RESERVE_ALLOC_NO_ACCOUNT = 2,
 };
 
-static int update_block_group(struct btrfs_root *root,
-                             u64 bytenr, u64 num_bytes, int alloc);
+static int update_block_group(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root, u64 bytenr,
+                             u64 num_bytes, int alloc);
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 parent,
                                   struct btrfs_root *root)
 {
        struct btrfs_block_group_cache *cache;
-       int err = 0;
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       int ret = 0;
        struct btrfs_path *path;
-       u64 last = 0;
+
+       if (list_empty(&cur_trans->dirty_bgs))
+               return 0;
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
-again:
-       while (1) {
-               cache = btrfs_lookup_first_block_group(root->fs_info, last);
-               while (cache) {
-                       if (cache->disk_cache_state == BTRFS_DC_CLEAR)
-                               break;
-                       cache = next_block_group(root, cache);
-               }
-               if (!cache) {
-                       if (last == 0)
-                               break;
-                       last = 0;
-                       continue;
-               }
-               err = cache_save_setup(cache, trans, path);
-               last = cache->key.objectid + cache->key.offset;
-               btrfs_put_block_group(cache);
-       }
-
-       while (1) {
-               if (last == 0) {
-                       err = btrfs_run_delayed_refs(trans, root,
-                                                    (unsigned long)-1);
-                       if (err) /* File system offline */
-                               goto out;
-               }
-
-               cache = btrfs_lookup_first_block_group(root->fs_info, last);
-               while (cache) {
-                       if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
-                               btrfs_put_block_group(cache);
-                               goto again;
-                       }
-
-                       if (cache->dirty)
-                               break;
-                       cache = next_block_group(root, cache);
-               }
-               if (!cache) {
-                       if (last == 0)
-                               break;
-                       last = 0;
-                       continue;
-               }
-
-               if (cache->disk_cache_state == BTRFS_DC_SETUP)
-                       cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
-               cache->dirty = 0;
-               last = cache->key.objectid + cache->key.offset;
-
-               err = write_one_cache_group(trans, root, path, cache);
-               btrfs_put_block_group(cache);
-               if (err) /* File system offline */
-                       goto out;
-       }
-
-       while (1) {
-               /*
-                * I don't think this is needed since we're just marking our
-                * preallocated extent as written, but just in case it can't
-                * hurt.
-                */
-               if (last == 0) {
-                       err = btrfs_run_delayed_refs(trans, root,
-                                                    (unsigned long)-1);
-                       if (err) /* File system offline */
-                               goto out;
-               }
-
-               cache = btrfs_lookup_first_block_group(root->fs_info, last);
-               while (cache) {
-                       /*
-                        * Really this shouldn't happen, but it could if we
-                        * couldn't write the entire preallocated extent and
-                        * splitting the extent resulted in a new block.
-                        */
-                       if (cache->dirty) {
-                               btrfs_put_block_group(cache);
-                               goto again;
-                       }
-                       if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
-                               break;
-                       cache = next_block_group(root, cache);
-               }
-               if (!cache) {
-                       if (last == 0)
-                               break;
-                       last = 0;
-                       continue;
-               }
-
-               err = btrfs_write_out_cache(root, trans, cache, path);
-
-               /*
-                * If we didn't have an error then the cache state is still
-                * NEED_WRITE, so we can set it to WRITTEN.
-                */
-               if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
-                       cache->disk_cache_state = BTRFS_DC_WRITTEN;
-               last = cache->key.objectid + cache->key.offset;
+       /*
+        * We don't need the lock here since we are protected by the transaction
+        * commit.  We want to do the cache_save_setup first and then run the
+        * delayed refs to make sure we have the best chance at doing this all
+        * in one shot.
+        */
+       while (!list_empty(&cur_trans->dirty_bgs)) {
+               cache = list_first_entry(&cur_trans->dirty_bgs,
+                                        struct btrfs_block_group_cache,
+                                        dirty_list);
+               list_del_init(&cache->dirty_list);
+               if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+                       cache_save_setup(cache, trans, path);
+               if (!ret)
+                       ret = btrfs_run_delayed_refs(trans, root,
+                                                    (unsigned long) -1);
+               if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
+                       btrfs_write_out_cache(root, trans, cache, path);
+               if (!ret)
+                       ret = write_one_cache_group(trans, root, path, cache);
                btrfs_put_block_group(cache);
        }
-out:
 
        btrfs_free_path(path);
-       return err;
+       return ret;
 }
 
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
        btrfs_free_reserved_data_space(inode, num_bytes);
 }
 
-static int update_block_group(struct btrfs_root *root,
-                             u64 bytenr, u64 num_bytes, int alloc)
+static int update_block_group(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root, u64 bytenr,
+                             u64 num_bytes, int alloc)
 {
        struct btrfs_block_group_cache *cache = NULL;
        struct btrfs_fs_info *info = root->fs_info;
                if (!alloc && cache->cached == BTRFS_CACHE_NO)
                        cache_block_group(cache, 1);
 
+               spin_lock(&trans->transaction->dirty_bgs_lock);
+               if (list_empty(&cache->dirty_list)) {
+                       list_add_tail(&cache->dirty_list,
+                                     &trans->transaction->dirty_bgs);
+                       btrfs_get_block_group(cache);
+               }
+               spin_unlock(&trans->transaction->dirty_bgs_lock);
+
                byte_in_group = bytenr - cache->key.objectid;
                WARN_ON(byte_in_group > cache->key.offset);
 
                    cache->disk_cache_state < BTRFS_DC_CLEAR)
                        cache->disk_cache_state = BTRFS_DC_CLEAR;
 
-               cache->dirty = 1;
                old_val = btrfs_block_group_used(&cache->item);
                num_bytes = min(total, cache->key.offset - byte_in_group);
                if (alloc) {
                        }
                }
 
-               ret = update_block_group(root, bytenr, num_bytes, 0);
+               ret = update_block_group(trans, root, bytenr, num_bytes, 0);
                if (ret) {
                        btrfs_abort_transaction(trans, extent_root, ret);
                        goto out;
        if (ret)
                return ret;
 
-       ret = update_block_group(root, ins->objectid, ins->offset, 1);
+       ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
                        ins->objectid, ins->offset);
                        return ret;
        }
 
-       ret = update_block_group(root, ins->objectid, root->nodesize, 1);
+       ret = update_block_group(trans, root, ins->objectid, root->nodesize,
+                                1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
                        ins->objectid, ins->offset);
        INIT_LIST_HEAD(&cache->cluster_list);
        INIT_LIST_HEAD(&cache->bg_list);
        INIT_LIST_HEAD(&cache->ro_list);
+       INIT_LIST_HEAD(&cache->dirty_list);
        btrfs_init_free_space_ctl(cache);
        atomic_set(&cache->trimming, 0);
 
                         * b) Setting 'dirty flag' makes sure that we flush
                         *    the new space cache info onto disk.
                         */
-                       cache->disk_cache_state = BTRFS_DC_CLEAR;
                        if (btrfs_test_opt(root, SPACE_CACHE))
-                               cache->dirty = 1;
+                               cache->disk_cache_state = BTRFS_DC_CLEAR;
                }
 
                read_extent_buffer(leaf, &cache->item,
                }
        }
 
+       spin_lock(&trans->transaction->dirty_bgs_lock);
+       if (!list_empty(&block_group->dirty_list)) {
+               list_del_init(&block_group->dirty_list);
+               btrfs_put_block_group(block_group);
+       }
+       spin_unlock(&trans->transaction->dirty_bgs_lock);
+
        btrfs_remove_free_space_cache(block_group);
 
        spin_lock(&block_group->space_info->lock);
 
        INIT_LIST_HEAD(&cur_trans->pending_chunks);
        INIT_LIST_HEAD(&cur_trans->switch_commits);
        INIT_LIST_HEAD(&cur_trans->pending_ordered);
+       INIT_LIST_HEAD(&cur_trans->dirty_bgs);
+       spin_lock_init(&cur_trans->dirty_bgs_lock);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(&cur_trans->dirty_pages,
                             fs_info->btree_inode->i_mapping);
        while (1) {
                old_root_bytenr = btrfs_root_bytenr(&root->root_item);
                if (old_root_bytenr == root->node->start &&
-                   old_root_used == btrfs_root_used(&root->root_item))
+                   old_root_used == btrfs_root_used(&root->root_item) &&
+                   (!extent_root ||
+                    list_empty(&trans->transaction->dirty_bgs)))
                        break;
 
                btrfs_set_root_node(&root->root_item, root->node);
                ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
                if (ret)
                        return ret;
+               ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+               if (ret)
+                       return ret;
        }
 
        return 0;
        struct extent_buffer *eb;
        int ret;
 
-       ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-       if (ret)
-               return ret;
-
        eb = btrfs_lock_root_node(fs_info->tree_root);
        ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
                              0, &eb);
        switch_commit_roots(cur_trans, root->fs_info);
 
        assert_qgroups_uptodate(trans);
+       ASSERT(list_empty(&cur_trans->dirty_bgs));
        update_super_roots(root);
 
        btrfs_set_super_log_root(root->fs_info->super_copy, 0);