#include "tree-log.h"
 #include "locking.h"
 #include "compat.h"
+#include "volumes.h"
 
 /*
  * when auto defrag is enabled we
 int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root, struct inode *inode,
                         struct btrfs_path *path, u64 start, u64 end,
-                        int drop_cache)
+                        u64 *drop_end, int drop_cache)
 {
        struct extent_buffer *leaf;
        struct btrfs_file_extent_item *fi;
                        btrfs_abort_transaction(trans, root, ret);
        }
 
+       if (drop_end)
+               *drop_end = min(end, extent_end);
        btrfs_release_path(path);
        return ret;
 }
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       ret = __btrfs_drop_extents(trans, root, inode, path, start, end,
+       ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
                                   drop_cache);
        btrfs_free_path(path);
        return ret;
        return 0;
 }
 
+static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
+                         int slot, u64 start, u64 end)
+{
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_key key;
+
+       if (slot < 0 || slot >= btrfs_header_nritems(leaf))
+               return 0;
+
+       btrfs_item_key_to_cpu(leaf, &key, slot);
+       if (key.objectid != btrfs_ino(inode) ||
+           key.type != BTRFS_EXTENT_DATA_KEY)
+               return 0;
+
+       fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+       if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
+               return 0;
+
+       if (btrfs_file_extent_disk_bytenr(leaf, fi))
+               return 0;
+
+       if (key.offset == end)
+               return 1;
+       if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
+               return 1;
+       return 0;
+}
+
+static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
+                     struct btrfs_path *path, u64 offset, u64 end)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct extent_buffer *leaf;
+       struct btrfs_file_extent_item *fi;
+       struct extent_map *hole_em;
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct btrfs_key key;
+       int ret;
+
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = offset;
+
+
+       ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+       if (ret < 0)
+               return ret;
+       BUG_ON(!ret);
+
+       leaf = path->nodes[0];
+       if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
+               u64 num_bytes;
+
+               path->slots[0]--;
+               fi = btrfs_item_ptr(leaf, path->slots[0],
+                                   struct btrfs_file_extent_item);
+               num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
+                       end - offset;
+               btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_offset(leaf, fi, 0);
+               btrfs_mark_buffer_dirty(leaf);
+               goto out;
+       }
+
+       if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) {
+               u64 num_bytes;
+
+               path->slots[0]++;
+               key.offset = offset;
+               btrfs_set_item_key_safe(trans, root, path, &key);
+               fi = btrfs_item_ptr(leaf, path->slots[0],
+                                   struct btrfs_file_extent_item);
+               num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
+                       offset;
+               btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_offset(leaf, fi, 0);
+               btrfs_mark_buffer_dirty(leaf);
+               goto out;
+       }
+       btrfs_release_path(path);
+
+       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
+                                      0, 0, end - offset, 0, end - offset,
+                                      0, 0, 0);
+       if (ret)
+               return ret;
+
+out:
+       btrfs_release_path(path);
+
+       hole_em = alloc_extent_map();
+       if (!hole_em) {
+               btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+               set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                       &BTRFS_I(inode)->runtime_flags);
+       } else {
+               hole_em->start = offset;
+               hole_em->len = end - offset;
+               hole_em->orig_start = offset;
+
+               hole_em->block_start = EXTENT_MAP_HOLE;
+               hole_em->block_len = 0;
+               hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
+               hole_em->compress_type = BTRFS_COMPRESS_NONE;
+               hole_em->generation = trans->transid;
+
+               do {
+                       btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+                       write_lock(&em_tree->lock);
+                       ret = add_extent_mapping(em_tree, hole_em);
+                       if (!ret)
+                               list_move(&hole_em->list,
+                                         &em_tree->modified_extents);
+                       write_unlock(&em_tree->lock);
+               } while (ret == -EEXIST);
+               free_extent_map(hole_em);
+               if (ret)
+                       set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                               &BTRFS_I(inode)->runtime_flags);
+       }
+
+       return 0;
+}
+
+static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct extent_state *cached_state = NULL;
+       struct btrfs_path *path;
+       struct btrfs_block_rsv *rsv;
+       struct btrfs_trans_handle *trans;
+       u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
+       u64 lockstart = (offset + mask) & ~mask;
+       u64 lockend = ((offset + len) & ~mask) - 1;
+       u64 cur_offset = lockstart;
+       u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
+       u64 drop_end;
+       unsigned long nr;
+       int ret = 0;
+       int err = 0;
+       bool same_page = (offset >> PAGE_CACHE_SHIFT) ==
+               ((offset + len) >> PAGE_CACHE_SHIFT);
+
+       btrfs_wait_ordered_range(inode, offset, len);
+
+       mutex_lock(&inode->i_mutex);
+       if (offset >= inode->i_size) {
+               mutex_unlock(&inode->i_mutex);
+               return 0;
+       }
+
+       /*
+        * Only do this if we are in the same page and we aren't doing the
+        * entire page.
+        */
+       if (same_page && len < PAGE_CACHE_SIZE) {
+               ret = btrfs_truncate_page(inode, offset, len, 0);
+               mutex_unlock(&inode->i_mutex);
+               return ret;
+       }
+
+       /* zero back part of the first page */
+       ret = btrfs_truncate_page(inode, offset, 0, 0);
+       if (ret) {
+               mutex_unlock(&inode->i_mutex);
+               return ret;
+       }
+
+       /* zero the front end of the last page */
+       ret = btrfs_truncate_page(inode, offset + len, 0, 1);
+       if (ret) {
+               mutex_unlock(&inode->i_mutex);
+               return ret;
+       }
+
+       if (lockend < lockstart) {
+               mutex_unlock(&inode->i_mutex);
+               return 0;
+       }
+
+       while (1) {
+               struct btrfs_ordered_extent *ordered;
+
+               truncate_pagecache_range(inode, lockstart, lockend);
+
+               lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                0, &cached_state);
+               ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
+
+               /*
+                * We need to make sure we have no ordered extents in this range
+                * and nobody raced in and read a page in this range, if we did
+                * we need to try again.
+                */
+               if ((!ordered ||
+                   (ordered->file_offset + ordered->len < lockstart ||
+                    ordered->file_offset > lockend)) &&
+                    !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                    lockend, EXTENT_UPTODATE, 0,
+                                    cached_state)) {
+                       if (ordered)
+                               btrfs_put_ordered_extent(ordered);
+                       break;
+               }
+               if (ordered)
+                       btrfs_put_ordered_extent(ordered);
+               unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+                                    lockend, &cached_state, GFP_NOFS);
+               btrfs_wait_ordered_range(inode, lockstart,
+                                        lockend - lockstart + 1);
+       }
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       rsv = btrfs_alloc_block_rsv(root);
+       if (!rsv) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+       rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
+       rsv->failfast = 1;
+
+       /*
+        * 1 - update the inode
+        * 1 - removing the extents in the range
+        * 1 - adding the hole extent
+        */
+       trans = btrfs_start_transaction(root, 3);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               goto out_free;
+       }
+
+       ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
+                                     min_size);
+       BUG_ON(ret);
+       trans->block_rsv = rsv;
+
+       while (cur_offset < lockend) {
+               ret = __btrfs_drop_extents(trans, root, inode, path,
+                                          cur_offset, lockend + 1,
+                                          &drop_end, 1);
+               if (ret != -ENOSPC)
+                       break;
+
+               trans->block_rsv = &root->fs_info->trans_block_rsv;
+
+               ret = fill_holes(trans, inode, path, cur_offset, drop_end);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
+
+               cur_offset = drop_end;
+
+               ret = btrfs_update_inode(trans, root, inode);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
+
+               nr = trans->blocks_used;
+               btrfs_end_transaction(trans, root);
+               btrfs_btree_balance_dirty(root, nr);
+
+               trans = btrfs_start_transaction(root, 3);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+                       trans = NULL;
+                       break;
+               }
+
+               ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
+                                             rsv, min_size);
+               BUG_ON(ret);    /* shouldn't happen */
+               trans->block_rsv = rsv;
+       }
+
+       if (ret) {
+               err = ret;
+               goto out_trans;
+       }
+
+       trans->block_rsv = &root->fs_info->trans_block_rsv;
+       ret = fill_holes(trans, inode, path, cur_offset, drop_end);
+       if (ret) {
+               err = ret;
+               goto out_trans;
+       }
+
+out_trans:
+       if (!trans)
+               goto out_free;
+
+       trans->block_rsv = &root->fs_info->trans_block_rsv;
+       ret = btrfs_update_inode(trans, root, inode);
+       nr = trans->blocks_used;
+       btrfs_end_transaction(trans, root);
+       btrfs_btree_balance_dirty(root, nr);
+out_free:
+       btrfs_free_path(path);
+       btrfs_free_block_rsv(root, rsv);
+out:
+       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                            &cached_state, GFP_NOFS);
+       mutex_unlock(&inode->i_mutex);
+       if (ret && !err)
+               err = ret;
+       return err;
+}
+
 static long btrfs_fallocate(struct file *file, int mode,
                            loff_t offset, loff_t len)
 {
        alloc_start = offset & ~mask;
        alloc_end =  (offset + len + mask) & ~mask;
 
-       /* We only support the FALLOC_FL_KEEP_SIZE mode */
-       if (mode & ~FALLOC_FL_KEEP_SIZE)
+       /* Make sure we aren't being give some crap mode */
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
 
+       if (mode & FALLOC_FL_PUNCH_HOLE)
+               return btrfs_punch_hole(inode, offset, len);
+
        /*
         * Make sure we have enough space before we do the
         * allocation.