* existing and update must have the same bytenr
  */
 static noinline void
-update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
+update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
+                        struct btrfs_delayed_ref_node *existing,
                         struct btrfs_delayed_ref_node *update)
 {
        struct btrfs_delayed_ref_head *existing_ref;
        struct btrfs_delayed_ref_head *ref;
+       int old_ref_mod;
 
        existing_ref = btrfs_delayed_node_to_head(existing);
        ref = btrfs_delayed_node_to_head(update);
         * only need the lock for this case cause we could be processing it
         * currently, for refs we just added we know we're a-ok.
         */
+       old_ref_mod = existing_ref->total_ref_mod;
        existing->ref_mod += update->ref_mod;
+       existing_ref->total_ref_mod += update->ref_mod;
+
+       /*
+        * If we are going to from a positive ref mod to a negative or vice
+        * versa we need to make sure to adjust pending_csums accordingly.
+        */
+       if (existing_ref->is_data) {
+               if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
+                       delayed_refs->pending_csums -= existing->num_bytes;
+               if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
+                       delayed_refs->pending_csums += existing->num_bytes;
+       }
        spin_unlock(&existing_ref->lock);
 }
 
        head_ref->is_data = is_data;
        head_ref->ref_root = RB_ROOT;
        head_ref->processing = 0;
+       head_ref->total_ref_mod = count_mod;
 
        spin_lock_init(&head_ref->lock);
        mutex_init(&head_ref->mutex);
        existing = htree_insert(&delayed_refs->href_root,
                                &head_ref->href_node);
        if (existing) {
-               update_existing_head_ref(&existing->node, ref);
+               update_existing_head_ref(delayed_refs, &existing->node, ref);
                /*
                 * we've updated the existing ref, free the newly
                 * allocated ref
                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
                head_ref = existing;
        } else {
+               if (is_data && count_mod < 0)
+                       delayed_refs->pending_csums += num_bytes;
                delayed_refs->num_heads++;
                delayed_refs->num_heads_ready++;
                atomic_inc(&delayed_refs->num_entries);
 
        struct rb_node href_node;
 
        struct btrfs_delayed_extent_op *extent_op;
+
+       /*
+        * This is used to track the final ref_mod from all the refs associated
+        * with this head ref, this is not adjusted as delayed refs are run,
+        * this is meant to track if we need to do the csum accounting or not.
+        */
+       int total_ref_mod;
+
        /*
         * when a new extent is allocated, it is just reserved in memory
         * The actual extent isn't inserted into the extent allocation tree
        /* total number of head nodes ready for processing */
        unsigned long num_heads_ready;
 
+       u64 pending_csums;
+
        /*
         * set when the tree is flushing before a transaction commit,
         * used by the throttling code to decide if new updates need
 
                 * list before we release it.
                 */
                if (btrfs_delayed_ref_is_head(ref)) {
+                       if (locked_ref->is_data &&
+                           locked_ref->total_ref_mod < 0) {
+                               spin_lock(&delayed_refs->lock);
+                               delayed_refs->pending_csums -= ref->num_bytes;
+                               spin_unlock(&delayed_refs->lock);
+                       }
                        btrfs_delayed_ref_unlock(locked_ref);
                        locked_ref = NULL;
                }
        return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
 }
 
+/*
+ * Takes the number of bytes to be csumm'ed and figures out how many leaves it
+ * would require to store the csums for that many bytes.
+ */
+static u64 csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
+{
+       u64 csum_size;
+       u64 num_csums_per_leaf;
+       u64 num_csums;
+
+       csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
+       num_csums_per_leaf = div64_u64(csum_size,
+                       (u64)btrfs_super_csum_size(root->fs_info->super_copy));
+       num_csums = div64_u64(csum_bytes, root->sectorsize);
+       num_csums += num_csums_per_leaf - 1;
+       num_csums = div64_u64(num_csums, num_csums_per_leaf);
+       return num_csums;
+}
+
 int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
                                       struct btrfs_root *root)
 {
        struct btrfs_block_rsv *global_rsv;
        u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
+       u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
        u64 num_bytes;
        int ret = 0;
 
        if (num_heads > 1)
                num_bytes += (num_heads - 1) * root->nodesize;
        num_bytes <<= 1;
+       num_bytes += csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
        global_rsv = &root->fs_info->global_block_rsv;
 
        /*
                                   int reserve)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 csum_size;
-       int num_csums_per_leaf;
-       int num_csums;
-       int old_csums;
+       u64 old_csums, num_csums;
 
        if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
            BTRFS_I(inode)->csum_bytes == 0)
                return 0;
 
-       old_csums = (int)div_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
+       old_csums = csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
+
        if (reserve)
                BTRFS_I(inode)->csum_bytes += num_bytes;
        else
                BTRFS_I(inode)->csum_bytes -= num_bytes;
-       csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
-       num_csums_per_leaf = (int)div_u64(csum_size,
-                                           sizeof(struct btrfs_csum_item) +
-                                           sizeof(struct btrfs_disk_key));
-       num_csums = (int)div_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
-       num_csums = num_csums + num_csums_per_leaf - 1;
-       num_csums = num_csums / num_csums_per_leaf;
-
-       old_csums = old_csums + num_csums_per_leaf - 1;
-       old_csums = old_csums / num_csums_per_leaf;
+       num_csums = csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
 
        /* No change, no need to reserve more */
        if (old_csums == num_csums)
 
        int extent_type = -1;
        int ret;
        int err = 0;
-       int be_nice = 0;
        u64 ino = btrfs_ino(inode);
        u64 bytes_deleted = 0;
+       bool be_nice = 0;
+       bool should_throttle = 0;
 
        BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
 
                                                btrfs_header_owner(leaf),
                                                ino, extent_offset, 0);
                        BUG_ON(ret);
-                       if (be_nice && pending_del_nr &&
-                           (pending_del_nr % 16 == 0) &&
-                           bytes_deleted > 1024 * 1024) {
+                       if (btrfs_should_throttle_delayed_refs(trans, root))
                                btrfs_async_run_delayed_refs(root,
                                        trans->delayed_ref_updates * 2, 0);
-                       }
                }
 
                if (found_type == BTRFS_INODE_ITEM_KEY)
                        break;
 
+               should_throttle =
+                       btrfs_should_throttle_delayed_refs(trans, root);
+
                if (path->slots[0] == 0 ||
-                   path->slots[0] != pending_del_slot) {
+                   path->slots[0] != pending_del_slot ||
+                   (be_nice && should_throttle)) {
                        if (pending_del_nr) {
                                ret = btrfs_del_items(trans, root, path,
                                                pending_del_slot,
                                pending_del_nr = 0;
                        }
                        btrfs_release_path(path);
+                       if (be_nice && should_throttle) {
+                               unsigned long updates = trans->delayed_ref_updates;
+                               if (updates) {
+                                       trans->delayed_ref_updates = 0;
+                                       ret = btrfs_run_delayed_refs(trans, root, updates * 2);
+                                       if (ret && !err)
+                                               err = ret;
+                               }
+                       }
                        goto search_again;
                } else {
                        path->slots[0]--;
 
        btrfs_free_path(path);
 
-       if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+       if (be_nice && btrfs_should_throttle_delayed_refs(trans, root)) {
                unsigned long updates = trans->delayed_ref_updates;
                if (updates) {
                        trans->delayed_ref_updates = 0;
 
        if (atomic_dec_and_test(&transaction->use_count)) {
                BUG_ON(!list_empty(&transaction->list));
                WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
+               if (transaction->delayed_refs.pending_csums)
+                       printk(KERN_ERR "pending csums is %llu\n",
+                              transaction->delayed_refs.pending_csums);
                while (!list_empty(&transaction->pending_chunks)) {
                        struct extent_map *em;
 
        cur_trans->delayed_refs.href_root = RB_ROOT;
        atomic_set(&cur_trans->delayed_refs.num_entries, 0);
        cur_trans->delayed_refs.num_heads_ready = 0;
+       cur_trans->delayed_refs.pending_csums = 0;
        cur_trans->delayed_refs.num_heads = 0;
        cur_trans->delayed_refs.flushing = 0;
        cur_trans->delayed_refs.run_delayed_start = 0;