up_read(&root->fs_info->cleanup_work_sem);
 }
 
+/*
+ * calculate extra metadata reservation when snapshotting a subvolume
+ * contains orphan files.
+ */
+void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
+                               struct btrfs_pending_snapshot *pending,
+                               u64 *bytes_to_reserve)
+{
+       struct btrfs_root *root;
+       struct btrfs_block_rsv *block_rsv;
+       u64 num_bytes;
+       int index;
+
+       root = pending->root;
+       if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
+               return;
+
+       block_rsv = root->orphan_block_rsv;
+
+       /* orphan block reservation for the snapshot */
+       num_bytes = block_rsv->size;
+
+       /*
+        * after the snapshot is created, COWing tree blocks may use more
+        * space than it frees. So we should make sure there is enough
+        * reserved space.
+        */
+       index = trans->transid & 0x1;
+       if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+               num_bytes += block_rsv->size -
+                            (block_rsv->reserved + block_rsv->freed[index]);
+       }
+
+       *bytes_to_reserve += num_bytes;
+}
+
+void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
+                               struct btrfs_pending_snapshot *pending)
+{
+       struct btrfs_root *root = pending->root;
+       struct btrfs_root *snap = pending->snap;
+       struct btrfs_block_rsv *block_rsv;
+       u64 num_bytes;
+       int index;
+       int ret;
+
+       if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
+               return;
+
+       /* refill source subvolume's orphan block reservation */
+       block_rsv = root->orphan_block_rsv;
+       index = trans->transid & 0x1;
+       if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+               num_bytes = block_rsv->size -
+                           (block_rsv->reserved + block_rsv->freed[index]);
+               ret = btrfs_block_rsv_migrate(&pending->block_rsv,
+                                             root->orphan_block_rsv,
+                                             num_bytes);
+               BUG_ON(ret);
+       }
+
+       /* setup orphan block reservation for the snapshot */
+       block_rsv = btrfs_alloc_block_rsv(snap);
+       BUG_ON(!block_rsv);
+
+       btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
+       snap->orphan_block_rsv = block_rsv;
+
+       num_bytes = root->orphan_block_rsv->size;
+       ret = btrfs_block_rsv_migrate(&pending->block_rsv,
+                                     block_rsv, num_bytes);
+       BUG_ON(ret);
+
+#if 0
+       /* insert orphan item for the snapshot */
+       WARN_ON(!root->orphan_item_inserted);
+       ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
+                                      snap->root_key.objectid);
+       BUG_ON(ret);
+       snap->orphan_item_inserted = 1;
+#endif
+}
+
+enum btrfs_orphan_cleanup_state {
+       ORPHAN_CLEANUP_STARTED  = 1,
+       ORPHAN_CLEANUP_DONE     = 2,
+};
+
+/*
+ * This is called in transaction commmit time. If there are no orphan
+ * files in the subvolume, it removes orphan item and frees block_rsv
+ * structure.
+ */
+void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root)
+{
+       int ret;
+
+       if (!list_empty(&root->orphan_list) ||
+           root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
+               return;
+
+       if (root->orphan_item_inserted &&
+           btrfs_root_refs(&root->root_item) > 0) {
+               ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
+                                           root->root_key.objectid);
+               BUG_ON(ret);
+               root->orphan_item_inserted = 0;
+       }
+
+       if (root->orphan_block_rsv) {
+               WARN_ON(root->orphan_block_rsv->size > 0);
+               btrfs_free_block_rsv(root, root->orphan_block_rsv);
+               root->orphan_block_rsv = NULL;
+       }
+}
+
 /*
  * This creates an orphan entry for the given inode in case something goes
  * wrong in the middle of an unlink/truncate.
+ *
+ * NOTE: caller of this function should reserve 5 units of metadata for
+ *      this function.
  */
 int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       int ret = 0;
+       struct btrfs_block_rsv *block_rsv = NULL;
+       int reserve = 0;
+       int insert = 0;
+       int ret;
 
-       spin_lock(&root->list_lock);
+       if (!root->orphan_block_rsv) {
+               block_rsv = btrfs_alloc_block_rsv(root);
+               BUG_ON(!block_rsv);
+       }
 
-       /* already on the orphan list, we're good */
-       if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
-               spin_unlock(&root->list_lock);
-               return 0;
+       spin_lock(&root->orphan_lock);
+       if (!root->orphan_block_rsv) {
+               root->orphan_block_rsv = block_rsv;
+       } else if (block_rsv) {
+               btrfs_free_block_rsv(root, block_rsv);
+               block_rsv = NULL;
        }
 
-       list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
+       if (list_empty(&BTRFS_I(inode)->i_orphan)) {
+               list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
+#if 0
+               /*
+                * For proper ENOSPC handling, we should do orphan
+                * cleanup when mounting. But this introduces backward
+                * compatibility issue.
+                */
+               if (!xchg(&root->orphan_item_inserted, 1))
+                       insert = 2;
+               else
+                       insert = 1;
+#endif
+               insert = 1;
+       } else {
+               WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
+       }
 
-       spin_unlock(&root->list_lock);
+       if (!BTRFS_I(inode)->orphan_meta_reserved) {
+               BTRFS_I(inode)->orphan_meta_reserved = 1;
+               reserve = 1;
+       }
+       spin_unlock(&root->orphan_lock);
 
-       /*
-        * insert an orphan item to track this unlinked/truncated file
-        */
-       ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
+       if (block_rsv)
+               btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
 
-       return ret;
+       /* grab metadata reservation from transaction handle */
+       if (reserve) {
+               ret = btrfs_orphan_reserve_metadata(trans, inode);
+               BUG_ON(ret);
+       }
+
+       /* insert an orphan item to track this unlinked/truncated file */
+       if (insert >= 1) {
+               ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
+               BUG_ON(ret);
+       }
+
+       /* insert an orphan item to track subvolume contains orphan files */
+       if (insert >= 2) {
+               ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
+                                              root->root_key.objectid);
+               BUG_ON(ret);
+       }
+       return 0;
 }
 
 /*
 int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       int delete_item = 0;
+       int release_rsv = 0;
        int ret = 0;
 
-       spin_lock(&root->list_lock);
-
-       if (list_empty(&BTRFS_I(inode)->i_orphan)) {
-               spin_unlock(&root->list_lock);
-               return 0;
+       spin_lock(&root->orphan_lock);
+       if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
+               list_del_init(&BTRFS_I(inode)->i_orphan);
+               delete_item = 1;
        }
 
-       list_del_init(&BTRFS_I(inode)->i_orphan);
-       if (!trans) {
-               spin_unlock(&root->list_lock);
-               return 0;
+       if (BTRFS_I(inode)->orphan_meta_reserved) {
+               BTRFS_I(inode)->orphan_meta_reserved = 0;
+               release_rsv = 1;
        }
+       spin_unlock(&root->orphan_lock);
 
-       spin_unlock(&root->list_lock);
+       if (trans && delete_item) {
+               ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
+               BUG_ON(ret);
+       }
 
-       ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
+       if (release_rsv)
+               btrfs_orphan_release_metadata(inode);
 
-       return ret;
+       return 0;
 }
 
 /*
        struct inode *inode;
        int ret = 0, nr_unlink = 0, nr_truncate = 0;
 
-       if (!xchg(&root->clean_orphans, 0))
+       if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
                return;
 
        path = btrfs_alloc_path();
                found_key.type = BTRFS_INODE_ITEM_KEY;
                found_key.offset = 0;
                inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
-               if (IS_ERR(inode))
-                       break;
+               BUG_ON(IS_ERR(inode));
 
                /*
                 * add this inode to the orphan list so btrfs_orphan_del does
                 * the proper thing when we hit it
                 */
-               spin_lock(&root->list_lock);
+               spin_lock(&root->orphan_lock);
                list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
-               spin_unlock(&root->list_lock);
+               spin_unlock(&root->orphan_lock);
 
                /*
                 * if this is a bad inode, means we actually succeeded in
                /* this will do delete_inode and everything for us */
                iput(inode);
        }
+       btrfs_free_path(path);
+
+       root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
+
+       if (root->orphan_block_rsv)
+               btrfs_block_rsv_release(root, root->orphan_block_rsv,
+                                       (u64)-1);
+
+       if (root->orphan_block_rsv || root->orphan_item_inserted) {
+               trans = btrfs_join_transaction(root, 1);
+               btrfs_end_transaction(trans, root);
+       }
 
        if (nr_unlink)
                printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
        if (nr_truncate)
                printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
-
-       btrfs_free_path(path);
 }
 
 /*
        if (pending_del_nr) {
                ret = btrfs_del_items(trans, root, path, pending_del_slot,
                                      pending_del_nr);
+               BUG_ON(ret);
        }
        btrfs_free_path(path);
        return err;
                }
        }
 
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_start_transaction(root, 5);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
        btrfs_set_trans_block_group(trans, inode);
 
        ret = btrfs_orphan_add(trans, inode);
                i_size_write(inode, attr->ia_size);
                btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
 
-               trans = btrfs_start_transaction(root, 1);
+               trans = btrfs_start_transaction(root, 0);
+               BUG_ON(IS_ERR(trans));
                btrfs_set_trans_block_group(trans, inode);
+               trans->block_rsv = root->orphan_block_rsv;
+               BUG_ON(!trans->block_rsv);
 
                ret = btrfs_update_inode(trans, root, inode);
                BUG_ON(ret);
        btrfs_i_size_write(inode, 0);
 
        while (1) {
-               trans = btrfs_start_transaction(root, 1);
+               trans = btrfs_start_transaction(root, 0);
+               BUG_ON(IS_ERR(trans));
                btrfs_set_trans_block_group(trans, inode);
-               ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
+               trans->block_rsv = root->orphan_block_rsv;
+
+               ret = btrfs_block_rsv_check(trans, root,
+                                           root->orphan_block_rsv, 0, 5);
+               if (ret) {
+                       BUG_ON(ret != -EAGAIN);
+                       ret = btrfs_commit_transaction(trans, root);
+                       BUG_ON(ret);
+                       continue;
+               }
 
+               ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
                if (ret != -EAGAIN)
                        break;
 
                btrfs_end_transaction(trans, root);
                trans = NULL;
                btrfs_btree_balance_dirty(root, nr);
+
        }
 
        if (ret == 0) {
        btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
        btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
 
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_start_transaction(root, 0);
+       BUG_ON(IS_ERR(trans));
        btrfs_set_trans_block_group(trans, inode);
+       trans->block_rsv = root->orphan_block_rsv;
 
        /*
         * setattr is responsible for setting the ordered_data_close flag,
                btrfs_add_ordered_operation(trans, root, inode);
 
        while (1) {
+               if (!trans) {
+                       trans = btrfs_start_transaction(root, 0);
+                       BUG_ON(IS_ERR(trans));
+                       btrfs_set_trans_block_group(trans, inode);
+                       trans->block_rsv = root->orphan_block_rsv;
+               }
+
+               ret = btrfs_block_rsv_check(trans, root,
+                                           root->orphan_block_rsv, 0, 5);
+               if (ret) {
+                       BUG_ON(ret != -EAGAIN);
+                       ret = btrfs_commit_transaction(trans, root);
+                       BUG_ON(ret);
+                       trans = NULL;
+                       continue;
+               }
+
                ret = btrfs_truncate_inode_items(trans, root, inode,
                                                 inode->i_size,
                                                 BTRFS_EXTENT_DATA_KEY);
 
                nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
+               trans = NULL;
                btrfs_btree_balance_dirty(root, nr);
-
-               trans = btrfs_start_transaction(root, 1);
-               btrfs_set_trans_block_group(trans, inode);
        }
 
        if (ret == 0 && inode->i_nlink > 0) {
        ei->reserved_extents = 0;
 
        ei->ordered_data_close = 0;
+       ei->orphan_meta_reserved = 0;
        ei->dummy_inode = 0;
        ei->force_compress = 0;
 
                spin_unlock(&root->fs_info->ordered_extent_lock);
        }
 
-       spin_lock(&root->list_lock);
+       spin_lock(&root->orphan_lock);
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
                printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
                       inode->i_ino);
                list_del_init(&BTRFS_I(inode)->i_orphan);
        }
-       spin_unlock(&root->list_lock);
+       spin_unlock(&root->orphan_lock);
 
        while (1) {
                ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);