wc->reada_slot = slot;
 }
 
+static int account_leaf_items(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct extent_buffer *eb)
+{
+       int nr = btrfs_header_nritems(eb);
+       int i, extent_type, ret;
+       struct btrfs_key key;
+       struct btrfs_file_extent_item *fi;
+       u64 bytenr, num_bytes;
+
+       for (i = 0; i < nr; i++) {
+               btrfs_item_key_to_cpu(eb, &key, i);
+
+               if (key.type != BTRFS_EXTENT_DATA_KEY)
+                       continue;
+
+               fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
+               /* filter out non qgroup-accountable extents  */
+               extent_type = btrfs_file_extent_type(eb, fi);
+
+               if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+                       continue;
+
+               bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
+               if (!bytenr)
+                       continue;
+
+               num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
+
+               ret = btrfs_qgroup_record_ref(trans, root->fs_info,
+                                             root->objectid,
+                                             bytenr, num_bytes,
+                                             BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+/*
+ * Walk up the tree from the bottom, freeing leaves and any interior
+ * nodes which have had all slots visited. If a node (leaf or
+ * interior) is freed, the node above it will have it's slot
+ * incremented. The root node will never be freed.
+ *
+ * At the end of this function, we should have a path which has all
+ * slots incremented to the next position for a search. If we need to
+ * read a new node it will be NULL and the node above it will have the
+ * correct slot selected for a later read.
+ *
+ * If we increment the root nodes slot counter past the number of
+ * elements, 1 is returned to signal completion of the search.
+ */
+static int adjust_slots_upwards(struct btrfs_root *root,
+                               struct btrfs_path *path, int root_level)
+{
+       int level = 0;
+       int nr, slot;
+       struct extent_buffer *eb;
+
+       if (root_level == 0)
+               return 1;
+
+       while (level <= root_level) {
+               eb = path->nodes[level];
+               nr = btrfs_header_nritems(eb);
+               path->slots[level]++;
+               slot = path->slots[level];
+               if (slot >= nr || level == 0) {
+                       /*
+                        * Don't free the root -  we will detect this
+                        * condition after our loop and return a
+                        * positive value for caller to stop walking the tree.
+                        */
+                       if (level != root_level) {
+                               btrfs_tree_unlock_rw(eb, path->locks[level]);
+                               path->locks[level] = 0;
+
+                               free_extent_buffer(eb);
+                               path->nodes[level] = NULL;
+                               path->slots[level] = 0;
+                       }
+               } else {
+                       /*
+                        * We have a valid slot to walk back down
+                        * from. Stop here so caller can process these
+                        * new nodes.
+                        */
+                       break;
+               }
+
+               level++;
+       }
+
+       eb = path->nodes[root_level];
+       if (path->slots[root_level] >= btrfs_header_nritems(eb))
+               return 1;
+
+       return 0;
+}
+
+/*
+ * root_eb is the subtree root and is locked before this function is called.
+ */
+static int account_shared_subtree(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+                                 struct extent_buffer *root_eb,
+                                 u64 root_gen,
+                                 int root_level)
+{
+       int ret = 0;
+       int level;
+       struct extent_buffer *eb = root_eb;
+       struct btrfs_path *path = NULL;
+
+       BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
+       BUG_ON(root_eb == NULL);
+
+       if (!root->fs_info->quota_enabled)
+               return 0;
+
+       if (!extent_buffer_uptodate(root_eb)) {
+               ret = btrfs_read_buffer(root_eb, root_gen);
+               if (ret)
+                       goto out;
+       }
+
+       if (root_level == 0) {
+               ret = account_leaf_items(trans, root, root_eb);
+               goto out;
+       }
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       /*
+        * Walk down the tree.  Missing extent blocks are filled in as
+        * we go. Metadata is accounted every time we read a new
+        * extent block.
+        *
+        * When we reach a leaf, we account for file extent items in it,
+        * walk back up the tree (adjusting slot pointers as we go)
+        * and restart the search process.
+        */
+       extent_buffer_get(root_eb); /* For path */
+       path->nodes[root_level] = root_eb;
+       path->slots[root_level] = 0;
+       path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
+walk_down:
+       level = root_level;
+       while (level >= 0) {
+               if (path->nodes[level] == NULL) {
+                       int child_bsize = root->nodesize;
+                       int parent_slot;
+                       u64 child_gen;
+                       u64 child_bytenr;
+
+                       /* We need to get child blockptr/gen from
+                        * parent before we can read it. */
+                       eb = path->nodes[level + 1];
+                       parent_slot = path->slots[level + 1];
+                       child_bytenr = btrfs_node_blockptr(eb, parent_slot);
+                       child_gen = btrfs_node_ptr_generation(eb, parent_slot);
+
+                       eb = read_tree_block(root, child_bytenr, child_bsize,
+                                            child_gen);
+                       if (!eb || !extent_buffer_uptodate(eb)) {
+                               ret = -EIO;
+                               goto out;
+                       }
+
+                       path->nodes[level] = eb;
+                       path->slots[level] = 0;
+
+                       btrfs_tree_read_lock(eb);
+                       btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+                       path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
+
+                       ret = btrfs_qgroup_record_ref(trans, root->fs_info,
+                                               root->objectid,
+                                               child_bytenr,
+                                               child_bsize,
+                                               BTRFS_QGROUP_OPER_SUB_SUBTREE,
+                                               0);
+                       if (ret)
+                               goto out;
+
+               }
+
+               if (level == 0) {
+                       ret = account_leaf_items(trans, root, path->nodes[level]);
+                       if (ret)
+                               goto out;
+
+                       /* Nonzero return here means we completed our search */
+                       ret = adjust_slots_upwards(root, path, root_level);
+                       if (ret)
+                               break;
+
+                       /* Restart search with new slots */
+                       goto walk_down;
+               }
+
+               level--;
+       }
+
+       ret = 0;
+out:
+       btrfs_free_path(path);
+
+       return ret;
+}
+
 /*
  * helper to process tree block while walking down the tree.
  *
        int level = wc->level;
        int reada = 0;
        int ret = 0;
+       bool need_account = false;
 
        generation = btrfs_node_ptr_generation(path->nodes[level],
                                               path->slots[level]);
 
        if (wc->stage == DROP_REFERENCE) {
                if (wc->refs[level - 1] > 1) {
+                       need_account = true;
                        if (level == 1 &&
                            (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
                                goto skip;
                        parent = 0;
                }
 
+               if (need_account) {
+                       ret = account_shared_subtree(trans, root, next,
+                                                    generation, level - 1);
+                       if (ret) {
+                               printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+                                       "%d accounting shared subtree. Quota "
+                                       "is out of sync, rescan required.\n",
+                                       root->fs_info->sb->s_id, ret);
+                       }
+               }
                ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
                                root->root_key.objectid, level - 1, 0, 0);
                BUG_ON(ret); /* -ENOMEM */
                        else
                                ret = btrfs_dec_ref(trans, root, eb, 0);
                        BUG_ON(ret); /* -ENOMEM */
+                       ret = account_leaf_items(trans, root, eb);
+                       if (ret) {
+                               printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+                                       "%d accounting leaf items. Quota "
+                                       "is out of sync, rescan required.\n",
+                                       root->fs_info->sb->s_id, ret);
+                       }
                }
                /* make block locked assertion in clean_tree_block happy */
                if (!path->locks[level] &&
        int level;
        bool root_dropped = false;
 
+       btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
+
        path = btrfs_alloc_path();
        if (!path) {
                err = -ENOMEM;
                                goto out_end_trans;
                        }
 
+                       /*
+                        * Qgroup update accounting is run from
+                        * delayed ref handling. This usually works
+                        * out because delayed refs are normally the
+                        * only way qgroup updates are added. However,
+                        * we may have added updates during our tree
+                        * walk so run qgroups here to make sure we
+                        * don't lose any updates.
+                        */
+                       ret = btrfs_delayed_qgroup_accounting(trans,
+                                                             root->fs_info);
+                       if (ret)
+                               printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
+                                                  "running qgroup updates "
+                                                  "during snapshot delete. "
+                                                  "Quota is out of sync, "
+                                                  "rescan required.\n", ret);
+
                        btrfs_end_transaction_throttle(trans, tree_root);
                        if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
                                pr_debug("BTRFS: drop snapshot early exit\n");
        }
        root_dropped = true;
 out_end_trans:
+       ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
+       if (ret)
+               printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
+                                  "running qgroup updates "
+                                  "during snapshot delete. "
+                                  "Quota is out of sync, "
+                                  "rescan required.\n", ret);
+
        btrfs_end_transaction_throttle(trans, tree_root);
 out_free:
        kfree(wc);
 
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
+
+static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
+                          struct btrfs_qgroup_operation *oper2)
+{
+       /*
+        * Ignore seq and type here, we're looking for any operation
+        * at all related to this extent on that root.
+        */
+       if (oper1->bytenr < oper2->bytenr)
+               return -1;
+       if (oper1->bytenr > oper2->bytenr)
+               return 1;
+       if (oper1->ref_root < oper2->ref_root)
+               return -1;
+       if (oper1->ref_root > oper2->ref_root)
+               return 1;
+       return 0;
+}
+
+static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
+                             struct btrfs_qgroup_operation *oper)
+{
+       struct rb_node *n;
+       struct btrfs_qgroup_operation *cur;
+       int cmp;
+
+       spin_lock(&fs_info->qgroup_op_lock);
+       n = fs_info->qgroup_op_tree.rb_node;
+       while (n) {
+               cur = rb_entry(n, struct btrfs_qgroup_operation, n);
+               cmp = comp_oper_exist(cur, oper);
+               if (cmp < 0) {
+                       n = n->rb_right;
+               } else if (cmp) {
+                       n = n->rb_left;
+               } else {
+                       spin_unlock(&fs_info->qgroup_op_lock);
+                       return -EEXIST;
+               }
+       }
+       spin_unlock(&fs_info->qgroup_op_lock);
+       return 0;
+}
+
 static int comp_oper(struct btrfs_qgroup_operation *oper1,
                     struct btrfs_qgroup_operation *oper2)
 {
        oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
        INIT_LIST_HEAD(&oper->elem.list);
        oper->elem.seq = 0;
+
+       if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
+               /*
+                * If any operation for this bytenr/ref_root combo
+                * exists, then we know it's not exclusively owned and
+                * shouldn't be queued up.
+                *
+                * This also catches the case where we have a cloned
+                * extent that gets queued up multiple times during
+                * drop snapshot.
+                */
+               if (qgroup_oper_exists(fs_info, oper)) {
+                       kfree(oper);
+                       return 0;
+               }
+       }
+
        ret = insert_qgroup_oper(fs_info, oper);
        if (ret) {
                /* Shouldn't happen so have an assert for developers */
        return ret;
 }
 
+/*
+ * Process a reference to a shared subtree. This type of operation is
+ * queued during snapshot removal when we encounter extents which are
+ * shared between more than one root.
+ */
+static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
+                                    struct btrfs_fs_info *fs_info,
+                                    struct btrfs_qgroup_operation *oper)
+{
+       struct ulist *roots = NULL;
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       struct btrfs_qgroup_list *glist;
+       struct ulist *parents;
+       int ret = 0;
+       struct btrfs_qgroup *qg;
+       u64 root_obj = 0;
+       struct seq_list elem = {};
+
+       parents = ulist_alloc(GFP_NOFS);
+       if (!parents)
+               return -ENOMEM;
+
+       btrfs_get_tree_mod_seq(fs_info, &elem);
+       ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
+                                  elem.seq, &roots);
+       btrfs_put_tree_mod_seq(fs_info, &elem);
+       if (ret < 0)
+               return ret;
+
+       if (roots->nnodes != 1)
+               goto out;
+
+       ULIST_ITER_INIT(&uiter);
+       unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
+       /*
+        * If we find our ref root then that means all refs
+        * this extent has to the root have not yet been
+        * deleted. In that case, we do nothing and let the
+        * last ref for this bytenr drive our update.
+        *
+        * This can happen for example if an extent is
+        * referenced multiple times in a snapshot (clone,
+        * etc). If we are in the middle of snapshot removal,
+        * queued updates for such an extent will find the
+        * root if we have not yet finished removing the
+        * snapshot.
+        */
+       if (unode->val == oper->ref_root)
+               goto out;
+
+       root_obj = unode->val;
+       BUG_ON(!root_obj);
+
+       spin_lock(&fs_info->qgroup_lock);
+       qg = find_qgroup_rb(fs_info, root_obj);
+       if (!qg)
+               goto out_unlock;
+
+       qg->excl += oper->num_bytes;
+       qg->excl_cmpr += oper->num_bytes;
+       qgroup_dirty(fs_info, qg);
+
+       /*
+        * Adjust counts for parent groups. First we find all
+        * parents, then in the 2nd loop we do the adjustment
+        * while adding parents of the parents to our ulist.
+        */
+       list_for_each_entry(glist, &qg->groups, next_group) {
+               ret = ulist_add(parents, glist->group->qgroupid,
+                               ptr_to_u64(glist->group), GFP_ATOMIC);
+               if (ret < 0)
+                       goto out_unlock;
+       }
+
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(parents, &uiter))) {
+               qg = u64_to_ptr(unode->aux);
+               qg->excl += oper->num_bytes;
+               qg->excl_cmpr += oper->num_bytes;
+               qgroup_dirty(fs_info, qg);
+
+               /* Add any parents of the parents */
+               list_for_each_entry(glist, &qg->groups, next_group) {
+                       ret = ulist_add(parents, glist->group->qgroupid,
+                                       ptr_to_u64(glist->group), GFP_ATOMIC);
+                       if (ret < 0)
+                               goto out_unlock;
+               }
+       }
+
+out_unlock:
+       spin_unlock(&fs_info->qgroup_lock);
+
+out:
+       ulist_free(roots);
+       ulist_free(parents);
+       return ret;
+}
+
 /*
  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
  * from the fs. First, all roots referencing the extent are searched, and
        case BTRFS_QGROUP_OPER_SUB_SHARED:
                ret = qgroup_shared_accounting(trans, fs_info, oper);
                break;
+       case BTRFS_QGROUP_OPER_SUB_SUBTREE:
+               ret = qgroup_subtree_accounting(trans, fs_info, oper);
+               break;
        default:
                ASSERT(0);
        }