trace_btrfs_space_reservation(fs_info, "delayed_item",
                                              item->key.objectid,
                                              num_bytes, 1);
-               item->bytes_reserved = num_bytes;
+               /*
+                * For insertions we track reserved metadata space by accounting
+                * for the number of leaves that will be used, based on the delayed
+                * node's index_items_size field.
+                */
+               if (item->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
+                       item->bytes_reserved = num_bytes;
        }
 
        return ret;
        btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
 }
 
+static void btrfs_delayed_item_release_leaves(struct btrfs_delayed_node *node,
+                                             unsigned int num_leaves)
+{
+       struct btrfs_fs_info *fs_info = node->root->fs_info;
+       const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, num_leaves);
+
+       /* There are no space reservations during log replay, bail out. */
+       if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
+               return;
+
+       trace_btrfs_space_reservation(fs_info, "delayed_item", node->inode_id,
+                                     bytes, 0);
+       btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv, bytes, NULL);
+}
+
 static int btrfs_delayed_inode_reserve_metadata(
                                        struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root,
                                     struct btrfs_path *path,
                                     struct btrfs_delayed_item *first_item)
 {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_delayed_node *node = first_item->delayed_node;
        LIST_HEAD(item_list);
        struct btrfs_delayed_item *curr;
        struct btrfs_delayed_item *next;
-       const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
+       const int max_size = BTRFS_LEAF_DATA_SIZE(fs_info);
        struct btrfs_item_batch batch;
        int total_size;
        char *ins_data = NULL;
        int ret;
 
+       lockdep_assert_held(&node->mutex);
+
+       /*
+        * For delayed items to insert, we track reserved metadata bytes based
+        * on the number of leaves that we will use.
+        * See btrfs_insert_delayed_dir_index() and
+        * btrfs_delayed_item_reserve_metadata()).
+        */
+       ASSERT(first_item->bytes_reserved == 0);
+
        list_add_tail(&first_item->tree_list, &item_list);
        batch.total_data_size = first_item->data_len;
        batch.nr = 1;
                if (!next)
                        break;
 
+               ASSERT(next->bytes_reserved == 0);
+
                next_size = next->data_len + sizeof(struct btrfs_item);
                if (total_size + next_size > max_size)
                        break;
         */
        btrfs_release_path(path);
 
+       ASSERT(node->index_item_leaves > 0);
+
+       if (next) {
+               /*
+                * We inserted one batch of items into a leaf a there are more
+                * items to flush in a future batch, now release one unit of
+                * metadata space from the delayed block reserve, corresponding
+                * the leaf we just flushed to.
+                */
+               btrfs_delayed_item_release_leaves(node, 1);
+               node->index_item_leaves--;
+       } else {
+               /*
+                * There are no more items to insert. We can have a number of
+                * reserved leaves > 1 here - this happens when many dir index
+                * items are added and then removed before they are flushed (file
+                * names with a very short life, never span a transaction). So
+                * release all remaining leaves.
+                */
+               btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
+               node->index_item_leaves = 0;
+       }
+
        list_for_each_entry_safe(curr, next, &item_list, tree_list) {
                list_del(&curr->tree_list);
-               btrfs_delayed_item_release_metadata(root, curr);
                btrfs_release_delayed_item(curr);
        }
 out:
                                   struct btrfs_disk_key *disk_key, u8 type,
                                   u64 index)
 {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
+       const unsigned int leaf_data_size = BTRFS_LEAF_DATA_SIZE(fs_info);
        struct btrfs_delayed_node *delayed_node;
        struct btrfs_delayed_item *delayed_item;
        struct btrfs_dir_item *dir_item;
+       bool reserve_leaf_space;
+       u32 data_len;
        int ret;
 
        delayed_node = btrfs_get_or_create_delayed_node(dir);
        btrfs_set_stack_dir_type(dir_item, type);
        memcpy((char *)(dir_item + 1), name, name_len);
 
-       ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
-       /*
-        * Space was reserved for a dir index item insertion when we started the
-        * transaction, so getting a failure here should be impossible.
-        */
-       if (WARN_ON(ret)) {
-               btrfs_release_delayed_item(delayed_item);
-               goto release_node;
-       }
+       data_len = delayed_item->data_len + sizeof(struct btrfs_item);
 
        mutex_lock(&delayed_node->mutex);
+
+       if (delayed_node->index_item_leaves == 0 ||
+           delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
+               delayed_node->curr_index_batch_size = data_len;
+               reserve_leaf_space = true;
+       } else {
+               delayed_node->curr_index_batch_size += data_len;
+               reserve_leaf_space = false;
+       }
+
+       if (reserve_leaf_space) {
+               ret = btrfs_delayed_item_reserve_metadata(trans, dir->root,
+                                                         delayed_item);
+               /*
+                * Space was reserved for a dir index item insertion when we
+                * started the transaction, so getting a failure here should be
+                * impossible.
+                */
+               if (WARN_ON(ret)) {
+                       mutex_unlock(&delayed_node->mutex);
+                       btrfs_release_delayed_item(delayed_item);
+                       goto release_node;
+               }
+
+               delayed_node->index_item_leaves++;
+       } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
+               const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
+
+               /*
+                * Adding the new dir index item does not require touching another
+                * leaf, so we can release 1 unit of metadata that was previously
+                * reserved when starting the transaction. This applies only to
+                * the case where we had a transaction start and excludes the
+                * transaction join case (when replaying log trees).
+                */
+               trace_btrfs_space_reservation(fs_info, "transaction",
+                                             trans->transid, bytes, 0);
+               btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
+               ASSERT(trans->bytes_reserved >= bytes);
+               trans->bytes_reserved -= bytes;
+       }
+
        ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
        if (unlikely(ret)) {
                btrfs_err(trans->fs_info,
                return 1;
        }
 
-       btrfs_delayed_item_release_metadata(node->root, item);
+       /*
+        * For delayed items to insert, we track reserved metadata bytes based
+        * on the number of leaves that we will use.
+        * See btrfs_insert_delayed_dir_index() and
+        * btrfs_delayed_item_reserve_metadata()).
+        */
+       ASSERT(item->bytes_reserved == 0);
+       ASSERT(node->index_item_leaves > 0);
+
+       /*
+        * If there's only one leaf reserved, we can decrement this item from the
+        * current batch, otherwise we can not because we don't know which leaf
+        * it belongs to. With the current limit on delayed items, we rarely
+        * accumulate enough dir index items to fill more than one leaf (even
+        * when using a leaf size of 4K).
+        */
+       if (node->index_item_leaves == 1) {
+               const u32 data_len = item->data_len + sizeof(struct btrfs_item);
+
+               ASSERT(node->curr_index_batch_size >= data_len);
+               node->curr_index_batch_size -= data_len;
+       }
+
        btrfs_release_delayed_item(item);
+
+       /* If we now have no more dir index items, we can release all leaves. */
+       if (RB_EMPTY_ROOT(&node->ins_root.rb_root)) {
+               btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
+               node->index_item_leaves = 0;
+       }
+
        mutex_unlock(&node->mutex);
        return 0;
 }
        mutex_lock(&delayed_node->mutex);
        curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
        while (curr_item) {
-               btrfs_delayed_item_release_metadata(root, curr_item);
                prev_item = curr_item;
                curr_item = __btrfs_next_delayed_item(prev_item);
                btrfs_release_delayed_item(prev_item);
        }
 
+       if (delayed_node->index_item_leaves > 0) {
+               btrfs_delayed_item_release_leaves(delayed_node,
+                                         delayed_node->index_item_leaves);
+               delayed_node->index_item_leaves = 0;
+       }
+
        curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
        while (curr_item) {
                btrfs_delayed_item_release_metadata(root, curr_item);