From a5b3f117daead61c3c9c88cd1159d38fa4ad1362 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 16 Dec 2024 12:27:07 +0000 Subject: [PATCH 01/16] btrfs: move btrfs_is_empty_uuid() from ioctl.c into fs.c It's a generic helper not specific to ioctls and used in several places, so move it out from ioctl.c and into fs.c. While at it change its return type from int to bool and declare the loop variable in the loop itself. This also slightly reduces the module's size. Before this change: $ size fs/btrfs/btrfs.ko text data bss dec hex filename 1781492 161037 16920 1959449 1de619 fs/btrfs/btrfs.ko After this change: $ size fs/btrfs/btrfs.ko text data bss dec hex filename 1781340 161037 16920 1959297 1de581 fs/btrfs/btrfs.ko Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/fs.c | 9 +++++++++ fs/btrfs/fs.h | 2 ++ fs/btrfs/ioctl.c | 11 ----------- fs/btrfs/ioctl.h | 1 - 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c index 09cfb43580cb..06a863252a85 100644 --- a/fs/btrfs/fs.c +++ b/fs/btrfs/fs.c @@ -55,6 +55,15 @@ size_t __attribute_const__ btrfs_get_num_csums(void) return ARRAY_SIZE(btrfs_csums); } +bool __pure btrfs_is_empty_uuid(const u8 *uuid) +{ + for (int i = 0; i < BTRFS_UUID_SIZE; i++) { + if (uuid[i] != 0) + return false; + } + return true; +} + /* * Start exclusive operation @type, return true on success. */ diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index b05f2af97140..15c26c6f4d6e 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -988,6 +988,8 @@ const char *btrfs_super_csum_name(u16 csum_type); const char *btrfs_super_csum_driver(u16 csum_type); size_t __attribute_const__ btrfs_get_num_csums(void); +bool __pure btrfs_is_empty_uuid(const u8 *uuid); + /* Compatibility and incompatibility defines */ void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, const char *name); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 243d08f37c58..415b20801d78 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -471,17 +471,6 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info, return ret; } -int __pure btrfs_is_empty_uuid(const u8 *uuid) -{ - int i; - - for (i = 0; i < BTRFS_UUID_SIZE; i++) { - if (uuid[i]) - return 0; - } - return 1; -} - /* * Calculate the number of transaction items to reserve for creating a subvolume * or snapshot, not including the inode, directory entries, or parent directory. diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 2b760c8778f8..ce915fcda43b 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -19,7 +19,6 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int btrfs_ioctl_get_supported_features(void __user *arg); void btrfs_sync_inode_flags_to_i_flags(struct inode *inode); -int __pure btrfs_is_empty_uuid(const u8 *uuid); void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_balance_args *bargs); int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); -- 2.51.0 From 2205302298af2036e9c164fca025ba7a1ab2c816 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 16 Dec 2024 12:58:09 +0000 Subject: [PATCH 02/16] btrfs: move the folio ordered helpers from ctree.h into fs.h The folio ordered helper macros are defined at ctree.h but this is not the best place since ctree.{h,c} is all about the btree data structure implementation and not a generic module. So move these macros into the fs.h header. Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 8 -------- fs/btrfs/fs.h | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a1bab0b3f193..3d9855d30057 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -756,12 +756,4 @@ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root) return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID; } -/* - * We use folio flag owner_2 to indicate there is an ordered extent with - * unfinished IO. - */ -#define folio_test_ordered(folio) folio_test_owner_2(folio) -#define folio_set_ordered(folio) folio_set_owner_2(folio) -#define folio_clear_ordered(folio) folio_clear_owner_2(folio) - #endif diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 15c26c6f4d6e..7a27f5fe9bc2 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -1066,6 +1066,14 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \ &(fs_info)->fs_state))) +/* + * We use folio flag owner_2 to indicate there is an ordered extent with + * unfinished IO. + */ +#define folio_test_ordered(folio) folio_test_owner_2(folio) +#define folio_set_ordered(folio) folio_set_owner_2(folio) +#define folio_clear_ordered(folio) folio_clear_owner_2(folio) + #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS #define EXPORT_FOR_TESTS -- 2.51.0 From a4545b74e2de98989c1d14bc48c52c2f9fa734b6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 16 Dec 2024 16:18:35 +0000 Subject: [PATCH 03/16] btrfs: move BTRFS_BYTES_TO_BLKS() into fs.h Currently BTRFS_BYTES_TO_BLKS() is defined in ctree.h but it's not related at all to the btree data structure, so move it into fs.h. Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 --- fs/btrfs/fs.h | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3d9855d30057..bf054470dcd0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -506,9 +506,6 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item); } -#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \ - ((bytes) >> (fs_info)->sectorsize_bits) - static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) { return mapping_gfp_constraint(mapping, ~__GFP_FS); diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 7a27f5fe9bc2..dd1a82297d4c 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -953,6 +953,8 @@ static inline u64 btrfs_calc_metadata_size(const struct btrfs_fs_info *fs_info, #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \ sizeof(struct btrfs_item)) +#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) ((bytes) >> (fs_info)->sectorsize_bits) + static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info) { return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && fs_info->zone_size > 0; -- 2.51.0 From 378f25d3fc429ad001fa686f615df5c0f9cd47e1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 16 Dec 2024 16:22:54 +0000 Subject: [PATCH 04/16] btrfs: move btrfs_alloc_write_mask() into fs.h Currently btrfs_alloc_write_mask() is defined in ctree.h but it's not related at all to the btree data structure, so move it into fs.h. Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 6 ------ fs/btrfs/fs.h | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bf054470dcd0..53f9fc04f66f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -7,7 +7,6 @@ #define BTRFS_CTREE_H #include "linux/cleanup.h" -#include #include #include #include @@ -506,11 +505,6 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item); } -static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) -{ - return mapping_gfp_constraint(mapping, ~__GFP_FS); -} - void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end); int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, u64 *actual_bytes); diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index dd1a82297d4c..1113646374f3 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -887,6 +888,11 @@ struct btrfs_fs_info { #define inode_to_fs_info(_inode) (BTRFS_I(_Generic((_inode), \ struct inode *: (_inode)))->root->fs_info) +static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) +{ + return mapping_gfp_constraint(mapping, ~__GFP_FS); +} + static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info) { return READ_ONCE(fs_info->generation); -- 2.51.0 From 07174a34295767389383fd4e27da2a41ebb6966e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 16 Dec 2024 16:29:45 +0000 Subject: [PATCH 05/16] btrfs: move extent-tree function declarations out of ctree.h We have 3 functions that have their prototypes declared in ctree.h but they are defined at extent-tree.c and they are unrelated to the btree data structure. Move the prototypes out of ctree.h and into extent-tree.h. Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 ----- fs/btrfs/extent-tree.h | 4 ++++ fs/btrfs/free-space-cache.c | 2 +- fs/btrfs/volumes.c | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 53f9fc04f66f..cdf10cca8194 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -505,11 +505,6 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item); } -void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end); -int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, - u64 num_bytes, u64 *actual_bytes); -int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range); - /* ctree.c */ int __init btrfs_ctree_init(void); void __cold btrfs_ctree_exit(void); diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h index 46b8e19022df..cfa52264f678 100644 --- a/fs/btrfs/extent-tree.h +++ b/fs/btrfs/extent-tree.h @@ -162,5 +162,9 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, struct extent_buffer *parent); +void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end); +int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, + u64 num_bytes, u64 *actual_bytes); +int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range); #endif diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cfa52ef40b06..17707c898eae 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -12,7 +12,7 @@ #include #include #include -#include "ctree.h" +#include "extent-tree.h" #include "fs.h" #include "messages.h" #include "misc.h" diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fcd80ba9dd42..d32913c51d69 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -13,8 +13,8 @@ #include #include #include "misc.h" -#include "ctree.h" #include "disk-io.h" +#include "extent-tree.h" #include "transaction.h" #include "volumes.h" #include "raid56.h" -- 2.51.0 From de9c8265b763f98b4b8f7f13acf4888285ac5a47 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 16 Dec 2024 16:36:19 +0000 Subject: [PATCH 06/16] btrfs: remove pointless comment from ctree.h It's pointless to have a comment above the prototype declarations of btrfs_ctree_init() and btrfs_ctree_exit() mentioning that they are declared in ctree.c. This is from the old days when ctree.h was used to place anything that didn't fit in any other file. So remove it. Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cdf10cca8194..1096a80a64e7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -505,7 +505,6 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item); } -/* ctree.c */ int __init btrfs_ctree_init(void); void __cold btrfs_ctree_exit(void); -- 2.51.0 From 6a2b3d7a36df2c7a7ad3a8ef00bc4ea194221c02 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 17 Dec 2024 12:00:39 +0000 Subject: [PATCH 07/16] btrfs: use uuid_is_null() to verify if an uuid is empty At btrfs_is_empty_uuid() we have our custom code to check if an uuid is empty, however there a kernel uuid library that has a function named uuid_is_null() which does the same and probably more efficient. So change btrfs_is_empty_uuid() to use uuid_is_null(), which is almost a directly replacement, it just wraps the necessary casting since our uuid types are u8 arrays while the uuid kernel library uses the uuid_t type, which is just a typedef of an u8 array of 16 elements as well. Also since the function is now to trivial, make it a static inline function in fs.h. Suggested-by: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/fs.c | 9 --------- fs/btrfs/fs.h | 5 ++++- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c index 06a863252a85..09cfb43580cb 100644 --- a/fs/btrfs/fs.c +++ b/fs/btrfs/fs.c @@ -55,15 +55,6 @@ size_t __attribute_const__ btrfs_get_num_csums(void) return ARRAY_SIZE(btrfs_csums); } -bool __pure btrfs_is_empty_uuid(const u8 *uuid) -{ - for (int i = 0; i < BTRFS_UUID_SIZE; i++) { - if (uuid[i] != 0) - return false; - } - return true; -} - /* * Start exclusive operation @type, return true on success. */ diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 1113646374f3..58e6b4b953f1 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -996,7 +996,10 @@ const char *btrfs_super_csum_name(u16 csum_type); const char *btrfs_super_csum_driver(u16 csum_type); size_t __attribute_const__ btrfs_get_num_csums(void); -bool __pure btrfs_is_empty_uuid(const u8 *uuid); +static inline bool btrfs_is_empty_uuid(const u8 *uuid) +{ + return uuid_is_null((const uuid_t *)uuid); +} /* Compatibility and incompatibility defines */ void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, -- 2.51.0 From 882af9f13e830c0a4ef696bb72cd5998a5067a93 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 19 Dec 2024 15:04:08 +1030 Subject: [PATCH 08/16] btrfs: handle free space tree rebuild in multiple transactions During free space tree rebuild, we're holding a transaction handle for the whole rebuild process. This can lead to blocked task warning, e.g. btrfs-transaction kthread (which is already created before btrfs_start_pre_rw_mount()) can be waked up to join and commit the current transaction. But the free space tree rebuild process may need to go through thousands block groups, this will block btrfs-transaction kthread for a long time. Fix the problem by calling btrfs_should_end_transaction() after each block group, so that we won't hold the transaction handle too long. And since the free-space-tree rebuild can be split into multiple transactions, we need to consider the safety when the rebuild process is interrupted. Thankfully since we only set the FREE_SPACE_TREE compat_ro flag without FREE_SPACE_TREE_VALID flag, even if the rebuild is interrupted, on the next RW mount, we will still go rebuild the free space tree, by deleting any items we have and re-starting the rebuild from scratch. Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/free-space-tree.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 7ba50e133921..2400fa5a5be4 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1350,6 +1350,12 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) btrfs_end_transaction(trans); return ret; } + if (btrfs_should_end_transaction(trans)) { + btrfs_end_transaction(trans); + trans = btrfs_start_transaction(free_space_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } node = rb_next(node); } -- 2.51.0 From 57e421867b7aa60783dac3d4caf97af74263f5f5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 17 Dec 2024 08:05:43 +0100 Subject: [PATCH 09/16] btrfs: don't include linux/rwlock_types.h directly The header clearly states that it does not want to be included directly, only via linux/spinlock_types.h. Drop this as we can simply use the spinlock.h which is already included. Signed-off-by: Wolfram Sang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 58e6b4b953f1..be8c32d1a7bb 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include -- 2.51.0 From 90dde9a13c0020ce140bc8d27c1f4c48a070cc97 Mon Sep 17 00:00:00 2001 From: "Roger L. Beckermeyer III" Date: Wed, 18 Dec 2024 08:28:50 +1030 Subject: [PATCH 10/16] rbtree: add rb_find_add_cached() to rbtree.h Adds rb_find_add_cached() as a helper function for use with red-black trees. Used in btrfs to reduce boilerplate code. And since it's a new helper, the cmp() function will require both parameter to be const rb_node pointers. Suggested-by: Josef Bacik Signed-off-by: Roger L. Beckermeyer III Acked-by: Peter Zijlstra (Intel) Reviewed-by: Qu Wenruo Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/rbtree.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 7c173aa64e1e..8d2ba3749866 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -210,6 +210,43 @@ rb_add(struct rb_node *node, struct rb_root *tree, rb_insert_color(node, tree); } +/** + * rb_find_add_cached() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Returns the rb_node matching @node, or NULL when no match is found and @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add_cached(struct rb_node *node, struct rb_root_cached *tree, + int (*cmp)(const struct rb_node *new, const struct rb_node *exist)) +{ + bool leftmost = true; + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *parent = NULL; + int c; + + while (*link) { + parent = *link; + c = cmp(node, parent); + + if (c < 0) { + link = &parent->rb_left; + } else if (c > 0) { + link = &parent->rb_right; + leftmost = false; + } else { + return parent; + } + } + + rb_link_node(node, parent, link); + rb_insert_color_cached(node, tree, leftmost); + return NULL; +} + /** * rb_find_add() - find equivalent @node in @tree, or add @node * @node: node to look-for / insert -- 2.51.0 From 372484f2c27c5ebd2b37d6c7d3f92678fc8c07af Mon Sep 17 00:00:00 2001 From: "Roger L. Beckermeyer III" Date: Wed, 18 Dec 2024 08:28:51 +1030 Subject: [PATCH 11/16] btrfs: update btrfs_add_block_group_cache() to use rb helper Update fs/btrfs/block-group.c to use rb_find_add_cached(). Suggested-by: Josef Bacik Signed-off-by: Roger L. Beckermeyer III Reviewed-by: Qu Wenruo Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 46 ++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5be029734cfa..39881d66cfa0 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -173,43 +173,41 @@ void btrfs_put_block_group(struct btrfs_block_group *cache) } } +static int btrfs_bg_start_cmp(const struct rb_node *new, + const struct rb_node *exist) +{ + const struct btrfs_block_group *new_bg = + rb_entry(new, struct btrfs_block_group, cache_node); + const struct btrfs_block_group *exist_bg = + rb_entry(exist, struct btrfs_block_group, cache_node); + + if (new_bg->start < exist_bg->start) + return -1; + if (new_bg->start > exist_bg->start) + return 1; + return 0; +} + /* * This adds the block group to the fs_info rb tree for the block group cache */ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, struct btrfs_block_group *block_group) { - struct rb_node **p; - struct rb_node *parent = NULL; - struct btrfs_block_group *cache; - bool leftmost = true; + struct rb_node *exist; + int ret = 0; ASSERT(block_group->length != 0); write_lock(&info->block_group_cache_lock); - p = &info->block_group_cache_tree.rb_root.rb_node; - - while (*p) { - parent = *p; - cache = rb_entry(parent, struct btrfs_block_group, cache_node); - if (block_group->start < cache->start) { - p = &(*p)->rb_left; - } else if (block_group->start > cache->start) { - p = &(*p)->rb_right; - leftmost = false; - } else { - write_unlock(&info->block_group_cache_lock); - return -EEXIST; - } - } - - rb_link_node(&block_group->cache_node, parent, p); - rb_insert_color_cached(&block_group->cache_node, - &info->block_group_cache_tree, leftmost); + exist = rb_find_add_cached(&block_group->cache_node, + &info->block_group_cache_tree, btrfs_bg_start_cmp); + if (exist) + ret = -EEXIST; write_unlock(&info->block_group_cache_lock); - return 0; + return ret; } /* -- 2.51.0 From 14ae60c71221d3ec64482476262e5b9eb4494be4 Mon Sep 17 00:00:00 2001 From: "Roger L. Beckermeyer III" Date: Wed, 18 Dec 2024 08:28:52 +1030 Subject: [PATCH 12/16] btrfs: update prelim_ref_insert() to use rb helpers Update prelim_ref_insert() to use rb_find_add_cached(). There is a special change that the existing prelim_ref_compare() is called with the first parameter as the existing ref in the rbtree. But the newer rb_find_add_cached() expects the cmp() function to have the first parameter as the to-be-added node, thus the new helper prelim_ref_rb_add_cmp() need to adapt this new order. Signed-off-by: Roger L. Beckermeyer III Reviewed-by: Qu Wenruo Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 79 +++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6d9f39c1d89c..3d3923cfc357 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -250,6 +250,21 @@ static int prelim_ref_compare(const struct prelim_ref *ref1, return 0; } +static int prelim_ref_rb_add_cmp(const struct rb_node *new, + const struct rb_node *exist) +{ + const struct prelim_ref *ref_new = + rb_entry(new, struct prelim_ref, rbnode); + const struct prelim_ref *ref_exist = + rb_entry(exist, struct prelim_ref, rbnode); + + /* + * prelim_ref_compare() expects the first parameter as the existing one, + * different from the rb_find_add_cached() order. + */ + return prelim_ref_compare(ref_exist, ref_new); +} + static void update_share_count(struct share_check *sc, int oldcount, int newcount, const struct prelim_ref *newref) { @@ -278,55 +293,39 @@ static void prelim_ref_insert(const struct btrfs_fs_info *fs_info, struct share_check *sc) { struct rb_root_cached *root; - struct rb_node **p; - struct rb_node *parent = NULL; - struct prelim_ref *ref; - int result; - bool leftmost = true; + struct rb_node *exist; root = &preftree->root; - p = &root->rb_root.rb_node; - - while (*p) { - parent = *p; - ref = rb_entry(parent, struct prelim_ref, rbnode); - result = prelim_ref_compare(ref, newref); - if (result < 0) { - p = &(*p)->rb_left; - } else if (result > 0) { - p = &(*p)->rb_right; - leftmost = false; - } else { - /* Identical refs, merge them and free @newref */ - struct extent_inode_elem *eie = ref->inode_list; + exist = rb_find_add_cached(&newref->rbnode, root, prelim_ref_rb_add_cmp); + if (exist) { + struct prelim_ref *ref = rb_entry(exist, struct prelim_ref, rbnode); + /* Identical refs, merge them and free @newref */ + struct extent_inode_elem *eie = ref->inode_list; - while (eie && eie->next) - eie = eie->next; + while (eie && eie->next) + eie = eie->next; - if (!eie) - ref->inode_list = newref->inode_list; - else - eie->next = newref->inode_list; - trace_btrfs_prelim_ref_merge(fs_info, ref, newref, - preftree->count); - /* - * A delayed ref can have newref->count < 0. - * The ref->count is updated to follow any - * BTRFS_[ADD|DROP]_DELAYED_REF actions. - */ - update_share_count(sc, ref->count, - ref->count + newref->count, newref); - ref->count += newref->count; - free_pref(newref); - return; - } + if (!eie) + ref->inode_list = newref->inode_list; + else + eie->next = newref->inode_list; + trace_btrfs_prelim_ref_merge(fs_info, ref, newref, + preftree->count); + /* + * A delayed ref can have newref->count < 0. + * The ref->count is updated to follow any + * BTRFS_[ADD|DROP]_DELAYED_REF actions. + */ + update_share_count(sc, ref->count, + ref->count + newref->count, newref); + ref->count += newref->count; + free_pref(newref); + return; } update_share_count(sc, 0, newref->count, newref); preftree->count++; trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count); - rb_link_node(&newref->rbnode, parent, p); - rb_insert_color_cached(&newref->rbnode, root, leftmost); } /* -- 2.51.0 From 0877597dc347169bed19e3e8282bbdce2593d16f Mon Sep 17 00:00:00 2001 From: "Roger L. Beckermeyer III" Date: Wed, 18 Dec 2024 08:28:53 +1030 Subject: [PATCH 13/16] btrfs: update __btrfs_add_delayed_item() to use rb helper Update __btrfs_add_delayed_item() to use rb_find_add_cached(). Signed-off-by: Roger L. Beckermeyer III Reviewed-by: Qu Wenruo Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 43 ++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 508bdbae29a0..60a6866a6cd9 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -366,40 +366,35 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item( return NULL; } +static int btrfs_delayed_item_cmp(const struct rb_node *new, + const struct rb_node *exist) +{ + const struct btrfs_delayed_item *new_item = + rb_entry(new, struct btrfs_delayed_item, rb_node); + const struct btrfs_delayed_item *exist_item = + rb_entry(exist, struct btrfs_delayed_item, rb_node); + + if (new_item->index < exist_item->index) + return -1; + if (new_item->index > exist_item->index) + return 1; + return 0; +} + static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, struct btrfs_delayed_item *ins) { - struct rb_node **p, *node; - struct rb_node *parent_node = NULL; struct rb_root_cached *root; - struct btrfs_delayed_item *item; - bool leftmost = true; + struct rb_node *exist; if (ins->type == BTRFS_DELAYED_INSERTION_ITEM) root = &delayed_node->ins_root; else root = &delayed_node->del_root; - p = &root->rb_root.rb_node; - node = &ins->rb_node; - - while (*p) { - parent_node = *p; - item = rb_entry(parent_node, struct btrfs_delayed_item, - rb_node); - - if (item->index < ins->index) { - p = &(*p)->rb_right; - leftmost = false; - } else if (item->index > ins->index) { - p = &(*p)->rb_left; - } else { - return -EEXIST; - } - } - - rb_link_node(node, parent_node, p); - rb_insert_color_cached(node, root, leftmost); + exist = rb_find_add_cached(&ins->rb_node, root, btrfs_delayed_item_cmp); + if (exist) + return -EEXIST; if (ins->type == BTRFS_DELAYED_INSERTION_ITEM && ins->index >= delayed_node->index_cnt) -- 2.51.0 From 287373c701e6d0ba93a7f21e979ed1bc25a02016 Mon Sep 17 00:00:00 2001 From: "Roger L. Beckermeyer III" Date: Wed, 18 Dec 2024 08:28:54 +1030 Subject: [PATCH 14/16] btrfs: update btrfs_add_chunk_map() to use rb helpers Update btrfs_add_chunk_map() to use rb_find_add_cached(). Signed-off-by: Roger L. Beckermeyer III Reviewed-by: Qu Wenruo Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d32913c51d69..c8b079ad1dfa 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5514,33 +5514,34 @@ void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_ma btrfs_free_chunk_map(map); } +static int btrfs_chunk_map_cmp(const struct rb_node *new, + const struct rb_node *exist) +{ + const struct btrfs_chunk_map *new_map = + rb_entry(new, struct btrfs_chunk_map, rb_node); + const struct btrfs_chunk_map *exist_map = + rb_entry(exist, struct btrfs_chunk_map, rb_node); + + if (new_map->start == exist_map->start) + return 0; + if (new_map->start < exist_map->start) + return -1; + return 1; +} + EXPORT_FOR_TESTS int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map) { - struct rb_node **p; - struct rb_node *parent = NULL; - bool leftmost = true; + struct rb_node *exist; write_lock(&fs_info->mapping_tree_lock); - p = &fs_info->mapping_tree.rb_root.rb_node; - while (*p) { - struct btrfs_chunk_map *entry; - - parent = *p; - entry = rb_entry(parent, struct btrfs_chunk_map, rb_node); - - if (map->start < entry->start) { - p = &(*p)->rb_left; - } else if (map->start > entry->start) { - p = &(*p)->rb_right; - leftmost = false; - } else { - write_unlock(&fs_info->mapping_tree_lock); - return -EEXIST; - } + exist = rb_find_add_cached(&map->rb_node, &fs_info->mapping_tree, + btrfs_chunk_map_cmp); + + if (exist) { + write_unlock(&fs_info->mapping_tree_lock); + return -EEXIST; } - rb_link_node(&map->rb_node, parent, p); - rb_insert_color_cached(&map->rb_node, &fs_info->mapping_tree, leftmost); chunk_map_device_set_bits(map, CHUNK_ALLOCATED); chunk_map_device_clear_bits(map, CHUNK_TRIMMED); write_unlock(&fs_info->mapping_tree_lock); -- 2.51.0 From 4e4d058e21294d8062bab0285ed456f711793990 Mon Sep 17 00:00:00 2001 From: "Roger L. Beckermeyer III" Date: Wed, 18 Dec 2024 08:28:55 +1030 Subject: [PATCH 15/16] btrfs: update tree_insert() to use rb helpers Update tree_insert() to use rb_find_add_cached(). add cmp_refs_node in rb_find_add_cached() to compare. Since we're here, also make comp_data_refs() and comp_refs() accept both parameters as const. Signed-off-by: Roger L. Beckermeyer III Reviewed-by: Qu Wenruo Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 45 +++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 30f7079fa28e..98c5b61dabe8 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -268,8 +268,8 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, /* * compare two delayed data backrefs with same bytenr and type */ -static int comp_data_refs(struct btrfs_delayed_ref_node *ref1, - struct btrfs_delayed_ref_node *ref2) +static int comp_data_refs(const struct btrfs_delayed_ref_node *ref1, + const struct btrfs_delayed_ref_node *ref2) { if (ref1->data_ref.objectid < ref2->data_ref.objectid) return -1; @@ -282,8 +282,8 @@ static int comp_data_refs(struct btrfs_delayed_ref_node *ref1, return 0; } -static int comp_refs(struct btrfs_delayed_ref_node *ref1, - struct btrfs_delayed_ref_node *ref2, +static int comp_refs(const struct btrfs_delayed_ref_node *ref1, + const struct btrfs_delayed_ref_node *ref2, bool check_seq) { int ret = 0; @@ -317,34 +317,25 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1, return 0; } +static int cmp_refs_node(const struct rb_node *new, const struct rb_node *exist) +{ + const struct btrfs_delayed_ref_node *new_node = + rb_entry(new, struct btrfs_delayed_ref_node, ref_node); + const struct btrfs_delayed_ref_node *exist_node = + rb_entry(exist, struct btrfs_delayed_ref_node, ref_node); + + return comp_refs(new_node, exist_node, true); +} + static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root, struct btrfs_delayed_ref_node *ins) { - struct rb_node **p = &root->rb_root.rb_node; struct rb_node *node = &ins->ref_node; - struct rb_node *parent_node = NULL; - struct btrfs_delayed_ref_node *entry; - bool leftmost = true; - - while (*p) { - int comp; - - parent_node = *p; - entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, - ref_node); - comp = comp_refs(ins, entry, true); - if (comp < 0) { - p = &(*p)->rb_left; - } else if (comp > 0) { - p = &(*p)->rb_right; - leftmost = false; - } else { - return entry; - } - } + struct rb_node *exist; - rb_link_node(node, parent_node, p); - rb_insert_color_cached(node, root, leftmost); + exist = rb_find_add_cached(node, root, cmp_refs_node); + if (exist) + return rb_entry(exist, struct btrfs_delayed_ref_node, ref_node); return NULL; } -- 2.51.0 From 2a9bb78cfd367fdeff74f15b1e98969912292d9e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 21 Dec 2024 16:15:19 +1030 Subject: [PATCH 16/16] btrfs: validate system chunk array at btrfs_validate_super() Currently btrfs_validate_super() only does a very basic check on the array chunk size (not too large than the available space, but not too small to contain no chunk). The more comprehensive checks (the regular chunk checks and size check inside the system chunk array) are all done inside btrfs_read_sys_array(). It's not a big deal, but it also means we do not do any validation on the system chunk array at super block writeback time either. Do the following modification to centralize the system chunk array checks into btrfs_validate_super(): - Make chunk_err() helper accept stack chunk pointer If @leaf parameter is NULL, then the @chunk pointer will be a pointer to the chunk item, other than the offset inside the leaf. And since @leaf can be NULL, add a new @fs_info parameter for that case. - Make btrfs_check_chunk_valid() handle stack chunk pointer The same as chunk_err(), a new @fs_info parameter, and if @leaf is NULL, then @chunk will be a pointer to a stack chunk. If @chunk is NULL, then all needed btrfs_chunk members will be read using the stack helper instead of the leaf helper. This means we need to read out all the needed member at the beginning of the function. Furthermore, at super block read time, fs_info->sectorsize is not yet initialized, we need one extra @sectorsize parameter to grab the correct sectorsize. - Introduce a helper validate_sys_chunk_array() * Validate the disk key. * Validate the size before we access the full chunk items. * Do the full chunk item validation. - Call validate_sys_chunk_array() at btrfs_validate_super() - Simplify the checks inside btrfs_read_sys_array() Now the checks will be converted to an ASSERT(). - Simplify the checks inside read_one_chunk() Now that all chunk items inside system chunk array and chunk tree are verified, there is no need to verify them again inside read_one_chunk(). This change has the following advantages: - More comprehensive checks at write time And unlike the sys_chunk_array read routine, this time we do not need to allocate a dummy extent buffer to do the check. All the checks done here require no new memory allocation. - Slightly improved readability when iterating the system chunk array Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 67 ++++++++++++++++++++++++++++ fs/btrfs/tree-checker.c | 96 +++++++++++++++++++++++------------------ fs/btrfs/tree-checker.h | 7 ++- fs/btrfs/volumes.c | 73 ++++++------------------------- 4 files changed, 139 insertions(+), 104 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eff0dd1ae62f..04d68f253940 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2327,6 +2327,71 @@ out: return ret; } +static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info, + const struct btrfs_super_block *sb) +{ + unsigned int cur = 0; /* Offset inside the sys chunk array */ + /* + * At sb read time, fs_info is not fully initialized. Thus we have + * to use super block sectorsize, which should have been validated. + */ + const u32 sectorsize = btrfs_super_sectorsize(sb); + u32 sys_array_size = btrfs_super_sys_array_size(sb); + + if (sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { + btrfs_err(fs_info, "system chunk array too big %u > %u", + sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); + return -EUCLEAN; + } + + while (cur < sys_array_size) { + struct btrfs_disk_key *disk_key; + struct btrfs_chunk *chunk; + struct btrfs_key key; + u64 type; + u16 num_stripes; + u32 len; + int ret; + + disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur); + len = sizeof(*disk_key); + + if (cur + len > sys_array_size) + goto short_read; + cur += len; + + btrfs_disk_key_to_cpu(&key, disk_key); + if (key.type != BTRFS_CHUNK_ITEM_KEY) { + btrfs_err(fs_info, + "unexpected item type %u in sys_array at offset %u", + key.type, cur); + return -EUCLEAN; + } + chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur); + num_stripes = btrfs_stack_chunk_num_stripes(chunk); + if (cur + btrfs_chunk_item_size(num_stripes) > sys_array_size) + goto short_read; + type = btrfs_stack_chunk_type(chunk); + if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) { + btrfs_err(fs_info, + "invalid chunk type %llu in sys_array at offset %u", + type, cur); + return -EUCLEAN; + } + ret = btrfs_check_chunk_valid(fs_info, NULL, chunk, key.offset, + sectorsize); + if (ret < 0) + return ret; + cur += btrfs_chunk_item_size(num_stripes); + } + return 0; +short_read: + btrfs_err(fs_info, + "super block sys chunk array short read, cur=%u sys_array_size=%u", + cur, sys_array_size); + return -EUCLEAN; +} + /* * Real super block validation * NOTE: super csum type and incompat features will not be checked here. @@ -2495,6 +2560,8 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info, ret = -EINVAL; } + ret = validate_sys_chunk_array(fs_info, sb); + /* * Obvious sys_chunk_array corruptions, it must hold at least one key * and one chunk diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index dfeee033f31f..43979891f7c8 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -764,22 +764,19 @@ static int check_block_group_item(struct extent_buffer *leaf, return 0; } -__printf(4, 5) +__printf(5, 6) __cold -static void chunk_err(const struct extent_buffer *leaf, +static void chunk_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *leaf, const struct btrfs_chunk *chunk, u64 logical, const char *fmt, ...) { - const struct btrfs_fs_info *fs_info = leaf->fs_info; - bool is_sb; + bool is_sb = !leaf; struct va_format vaf; va_list args; int i; int slot = -1; - /* Only superblock eb is able to have such small offset */ - is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET); - if (!is_sb) { /* * Get the slot number by iterating through all slots, this @@ -812,13 +809,17 @@ static void chunk_err(const struct extent_buffer *leaf, /* * The common chunk check which could also work on super block sys chunk array. * + * If @leaf is NULL, then @chunk must be an on-stack chunk item. + * (For superblock sys_chunk array, and fs_info->sectorsize is unreliable) + * * Return -EUCLEAN if anything is corrupted. * Return 0 if everything is OK. */ -int btrfs_check_chunk_valid(struct extent_buffer *leaf, - struct btrfs_chunk *chunk, u64 logical) +int btrfs_check_chunk_valid(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *leaf, + const struct btrfs_chunk *chunk, u64 logical, + u32 sectorsize) { - struct btrfs_fs_info *fs_info = leaf->fs_info; u64 length; u64 chunk_end; u64 stripe_len; @@ -826,63 +827,73 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, u16 sub_stripes; u64 type; u64 features; + u32 chunk_sector_size; bool mixed = false; int raid_index; int nparity; int ncopies; - length = btrfs_chunk_length(leaf, chunk); - stripe_len = btrfs_chunk_stripe_len(leaf, chunk); - num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); - type = btrfs_chunk_type(leaf, chunk); + if (leaf) { + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + chunk_sector_size = btrfs_chunk_sector_size(leaf, chunk); + } else { + length = btrfs_stack_chunk_length(chunk); + stripe_len = btrfs_stack_chunk_stripe_len(chunk); + num_stripes = btrfs_stack_chunk_num_stripes(chunk); + sub_stripes = btrfs_stack_chunk_sub_stripes(chunk); + type = btrfs_stack_chunk_type(chunk); + chunk_sector_size = btrfs_stack_chunk_sector_size(chunk); + } raid_index = btrfs_bg_flags_to_raid_index(type); ncopies = btrfs_raid_array[raid_index].ncopies; nparity = btrfs_raid_array[raid_index].nparity; if (unlikely(!num_stripes)) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "invalid chunk num_stripes, have %u", num_stripes); return -EUCLEAN; } if (unlikely(num_stripes < ncopies)) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "invalid chunk num_stripes < ncopies, have %u < %d", num_stripes, ncopies); return -EUCLEAN; } if (unlikely(nparity && num_stripes == nparity)) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "invalid chunk num_stripes == nparity, have %u == %d", num_stripes, nparity); return -EUCLEAN; } - if (unlikely(!IS_ALIGNED(logical, fs_info->sectorsize))) { - chunk_err(leaf, chunk, logical, + if (unlikely(!IS_ALIGNED(logical, sectorsize))) { + chunk_err(fs_info, leaf, chunk, logical, "invalid chunk logical, have %llu should aligned to %u", - logical, fs_info->sectorsize); + logical, sectorsize); return -EUCLEAN; } - if (unlikely(btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize)) { - chunk_err(leaf, chunk, logical, + if (unlikely(chunk_sector_size != sectorsize)) { + chunk_err(fs_info, leaf, chunk, logical, "invalid chunk sectorsize, have %u expect %u", - btrfs_chunk_sector_size(leaf, chunk), - fs_info->sectorsize); + chunk_sector_size, sectorsize); return -EUCLEAN; } - if (unlikely(!length || !IS_ALIGNED(length, fs_info->sectorsize))) { - chunk_err(leaf, chunk, logical, + if (unlikely(!length || !IS_ALIGNED(length, sectorsize))) { + chunk_err(fs_info, leaf, chunk, logical, "invalid chunk length, have %llu", length); return -EUCLEAN; } if (unlikely(check_add_overflow(logical, length, &chunk_end))) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "invalid chunk logical start and length, have logical start %llu length %llu", logical, length); return -EUCLEAN; } if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "invalid chunk stripe length: %llu", stripe_len); return -EUCLEAN; @@ -896,30 +907,29 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, * Thus it should be a good way to catch obvious bitflips. */ if (unlikely(length >= btrfs_stripe_nr_to_offset(U32_MAX))) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "chunk length too large: have %llu limit %llu", length, btrfs_stripe_nr_to_offset(U32_MAX)); return -EUCLEAN; } if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK))) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "unrecognized chunk type: 0x%llx", ~(BTRFS_BLOCK_GROUP_TYPE_MASK | - BTRFS_BLOCK_GROUP_PROFILE_MASK) & - btrfs_chunk_type(leaf, chunk)); + BTRFS_BLOCK_GROUP_PROFILE_MASK) & type); return -EUCLEAN; } if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set", type & BTRFS_BLOCK_GROUP_PROFILE_MASK); return -EUCLEAN; } if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", type, BTRFS_BLOCK_GROUP_TYPE_MASK); return -EUCLEAN; @@ -928,7 +938,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) && (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA)))) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "system chunk with data or metadata type: 0x%llx", type); return -EUCLEAN; @@ -941,7 +951,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, if (!mixed) { if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) && (type & BTRFS_BLOCK_GROUP_DATA))) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "mixed chunk type in non-mixed mode: 0x%llx", type); return -EUCLEAN; } @@ -963,7 +973,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, num_stripes != btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes) || ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes))) { - chunk_err(leaf, chunk, logical, + chunk_err(fs_info, leaf, chunk, logical, "invalid num_stripes:sub_stripes %u:%u for profile %llu", num_stripes, sub_stripes, type & BTRFS_BLOCK_GROUP_PROFILE_MASK); @@ -983,14 +993,15 @@ static int check_leaf_chunk_item(struct extent_buffer *leaf, struct btrfs_chunk *chunk, struct btrfs_key *key, int slot) { + struct btrfs_fs_info *fs_info = leaf->fs_info; int num_stripes; if (unlikely(btrfs_item_size(leaf, slot) < sizeof(struct btrfs_chunk))) { - chunk_err(leaf, chunk, key->offset, + chunk_err(fs_info, leaf, chunk, key->offset, "invalid chunk item size: have %u expect [%zu, %u)", btrfs_item_size(leaf, slot), sizeof(struct btrfs_chunk), - BTRFS_LEAF_DATA_SIZE(leaf->fs_info)); + BTRFS_LEAF_DATA_SIZE(fs_info)); return -EUCLEAN; } @@ -1001,14 +1012,15 @@ static int check_leaf_chunk_item(struct extent_buffer *leaf, if (unlikely(btrfs_chunk_item_size(num_stripes) != btrfs_item_size(leaf, slot))) { - chunk_err(leaf, chunk, key->offset, + chunk_err(fs_info, leaf, chunk, key->offset, "invalid chunk item size: have %u expect %lu", btrfs_item_size(leaf, slot), btrfs_chunk_item_size(num_stripes)); return -EUCLEAN; } out: - return btrfs_check_chunk_valid(leaf, chunk, key->offset); + return btrfs_check_chunk_valid(fs_info, leaf, chunk, key->offset, + fs_info->sectorsize); } __printf(3, 4) diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index db67f96cbe4b..eb201f4ec3c7 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -10,6 +10,7 @@ #include struct extent_buffer; +struct btrfs_fs_info; struct btrfs_chunk; struct btrfs_key; @@ -66,8 +67,10 @@ enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node); int btrfs_check_leaf(struct extent_buffer *leaf); int btrfs_check_node(struct extent_buffer *node); -int btrfs_check_chunk_valid(struct extent_buffer *leaf, - struct btrfs_chunk *chunk, u64 logical); +int btrfs_check_chunk_valid(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *leaf, + const struct btrfs_chunk *chunk, u64 logical, + u32 sectorsize); int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner); int btrfs_verify_level_key(struct extent_buffer *eb, const struct btrfs_tree_parent_check *check); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c8b079ad1dfa..a58cf494b3d0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7004,16 +7004,6 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, warn_32bit_meta_chunk(fs_info, logical, length, type); #endif - /* - * Only need to verify chunk item if we're reading from sys chunk array, - * as chunk item in tree block is already verified by tree-checker. - */ - if (leaf->start == BTRFS_SUPER_INFO_OFFSET) { - ret = btrfs_check_chunk_valid(leaf, chunk, logical); - if (ret) - return ret; - } - map = btrfs_find_chunk_map(fs_info, logical, 1); /* already mapped? */ @@ -7271,16 +7261,11 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) { struct btrfs_super_block *super_copy = fs_info->super_copy; struct extent_buffer *sb; - struct btrfs_disk_key *disk_key; - struct btrfs_chunk *chunk; u8 *array_ptr; unsigned long sb_array_offset; int ret = 0; - u32 num_stripes; u32 array_size; - u32 len = 0; u32 cur_offset; - u64 type; struct btrfs_key key; ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize); @@ -7303,10 +7288,15 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) cur_offset = 0; while (cur_offset < array_size) { - disk_key = (struct btrfs_disk_key *)array_ptr; - len = sizeof(*disk_key); - if (cur_offset + len > array_size) - goto out_short_read; + struct btrfs_chunk *chunk; + struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)array_ptr; + u32 len = sizeof(*disk_key); + + /* + * The sys_chunk_array has been already verified at super block + * read time. Only do ASSERT()s for basic checks. + */ + ASSERT(cur_offset + len <= array_size); btrfs_disk_key_to_cpu(&key, disk_key); @@ -7314,44 +7304,14 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) sb_array_offset += len; cur_offset += len; - if (key.type != BTRFS_CHUNK_ITEM_KEY) { - btrfs_err(fs_info, - "unexpected item type %u in sys_array at offset %u", - (u32)key.type, cur_offset); - ret = -EIO; - break; - } + ASSERT(key.type == BTRFS_CHUNK_ITEM_KEY); chunk = (struct btrfs_chunk *)sb_array_offset; - /* - * At least one btrfs_chunk with one stripe must be present, - * exact stripe count check comes afterwards - */ - len = btrfs_chunk_item_size(1); - if (cur_offset + len > array_size) - goto out_short_read; - - num_stripes = btrfs_chunk_num_stripes(sb, chunk); - if (!num_stripes) { - btrfs_err(fs_info, - "invalid number of stripes %u in sys_array at offset %u", - num_stripes, cur_offset); - ret = -EIO; - break; - } + ASSERT(btrfs_chunk_type(sb, chunk) & BTRFS_BLOCK_GROUP_SYSTEM); - type = btrfs_chunk_type(sb, chunk); - if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { - btrfs_err(fs_info, - "invalid chunk type %llu in sys_array at offset %u", - type, cur_offset); - ret = -EIO; - break; - } + len = btrfs_chunk_item_size(btrfs_chunk_num_stripes(sb, chunk)); - len = btrfs_chunk_item_size(num_stripes); - if (cur_offset + len > array_size) - goto out_short_read; + ASSERT(cur_offset + len <= array_size); ret = read_one_chunk(&key, sb, chunk); if (ret) @@ -7364,13 +7324,6 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) clear_extent_buffer_uptodate(sb); free_extent_buffer_stale(sb); return ret; - -out_short_read: - btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u", - len, cur_offset); - clear_extent_buffer_uptodate(sb); - free_extent_buffer_stale(sb); - return -EIO; } /* -- 2.51.0