struct list_head root_list;
 
+       spinlock_t log_extents_lock[2];
+       struct list_head logged_list[2];
+
        spinlock_t orphan_lock;
        atomic_t orphan_inodes;
        struct btrfs_block_rsv *orphan_block_rsv;
 
 
        INIT_LIST_HEAD(&root->dirty_list);
        INIT_LIST_HEAD(&root->root_list);
+       INIT_LIST_HEAD(&root->logged_list[0]);
+       INIT_LIST_HEAD(&root->logged_list[1]);
        spin_lock_init(&root->orphan_lock);
        spin_lock_init(&root->inode_lock);
        spin_lock_init(&root->accounting_lock);
+       spin_lock_init(&root->log_extents_lock[0]);
+       spin_lock_init(&root->log_extents_lock[1]);
        mutex_init(&root->objectid_mutex);
        mutex_init(&root->log_mutex);
        init_waitqueue_head(&root->log_writer_wait);
 
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret = 0;
        struct btrfs_trans_handle *trans;
+       bool full_sync = 0;
 
        trace_btrfs_sync_file(file, datasync);
 
        /*
         * We write the dirty pages in the range and wait until they complete
         * out of the ->i_mutex. If so, we can flush the dirty pages by
-        * multi-task, and make the performance up.
+        * multi-task, and make the performance up.  See
+        * btrfs_wait_ordered_range for an explanation of the ASYNC check.
         */
        atomic_inc(&BTRFS_I(inode)->sync_writers);
-       ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+       ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+       if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                            &BTRFS_I(inode)->runtime_flags))
+               ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
        atomic_dec(&BTRFS_I(inode)->sync_writers);
        if (ret)
                return ret;
         * range being left.
         */
        atomic_inc(&root->log_batch);
-       btrfs_wait_ordered_range(inode, start, end - start + 1);
+       full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                            &BTRFS_I(inode)->runtime_flags);
+       if (full_sync)
+               btrfs_wait_ordered_range(inode, start, end - start + 1);
        atomic_inc(&root->log_batch);
 
        /*
 
        if (ret != BTRFS_NO_LOG_SYNC) {
                if (ret > 0) {
+                       /*
+                        * If we didn't already wait for ordered extents we need
+                        * to do that now.
+                        */
+                       if (!full_sync)
+                               btrfs_wait_ordered_range(inode, start,
+                                                        end - start + 1);
                        ret = btrfs_commit_transaction(trans, root);
                } else {
                        ret = btrfs_sync_log(trans, root);
-                       if (ret == 0)
+                       if (ret == 0) {
                                ret = btrfs_end_transaction(trans, root);
-                       else
+                       } else {
+                               if (!full_sync)
+                                       btrfs_wait_ordered_range(inode, start,
+                                                                end -
+                                                                start + 1);
                                ret = btrfs_commit_transaction(trans, root);
+                       }
                }
        } else {
                ret = btrfs_end_transaction(trans, root);
 
                em->start = async_extent->start;
                em->len = async_extent->ram_size;
                em->orig_start = em->start;
+               em->mod_start = em->start;
+               em->mod_len = em->len;
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
                em->orig_start = em->start;
                ram_size = ins.offset;
                em->len = ins.offset;
+               em->mod_start = em->start;
+               em->mod_len = em->len;
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
                        em->block_start = disk_bytenr;
                        em->orig_block_len = disk_num_bytes;
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
+                       em->mod_start = em->start;
+                       em->mod_len = em->len;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
                        set_bit(EXTENT_FLAG_FILLING, &em->flags);
                        em->generation = -1;
 
        em->start = start;
        em->orig_start = orig_start;
+       em->mod_start = start;
+       em->mod_len = len;
        em->len = len;
        em->block_len = block_len;
        em->block_start = block_start;
 
        entry->file_offset = file_offset;
        entry->start = start;
        entry->len = len;
+       if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
+           !(type == BTRFS_ORDERED_NOCOW))
+               entry->csum_bytes_left = disk_len;
        entry->disk_len = disk_len;
        entry->bytes_left = len;
        entry->inode = igrab(inode);
        INIT_LIST_HEAD(&entry->root_extent_list);
        INIT_LIST_HEAD(&entry->work_list);
        init_completion(&entry->completion);
+       INIT_LIST_HEAD(&entry->log_list);
 
        trace_btrfs_ordered_extent_add(inode, entry);
 
        tree = &BTRFS_I(inode)->ordered_tree;
        spin_lock_irq(&tree->lock);
        list_add_tail(&sum->list, &entry->list);
+       WARN_ON(entry->csum_bytes_left < sum->len);
+       entry->csum_bytes_left -= sum->len;
+       if (entry->csum_bytes_left == 0)
+               wake_up(&entry->wait);
        spin_unlock_irq(&tree->lock);
 }
 
        return ret == 0;
 }
 
+/* Needs to either be called under a log transaction or the log_mutex */
+void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode)
+{
+       struct btrfs_ordered_inode_tree *tree;
+       struct btrfs_ordered_extent *ordered;
+       struct rb_node *n;
+       int index = log->log_transid % 2;
+
+       tree = &BTRFS_I(inode)->ordered_tree;
+       spin_lock_irq(&tree->lock);
+       for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
+               ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
+               spin_lock(&log->log_extents_lock[index]);
+               if (list_empty(&ordered->log_list)) {
+                       list_add_tail(&ordered->log_list, &log->logged_list[index]);
+                       atomic_inc(&ordered->refs);
+               }
+               spin_unlock(&log->log_extents_lock[index]);
+       }
+       spin_unlock_irq(&tree->lock);
+}
+
+void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
+{
+       struct btrfs_ordered_extent *ordered;
+       int index = transid % 2;
+
+       spin_lock_irq(&log->log_extents_lock[index]);
+       while (!list_empty(&log->logged_list[index])) {
+               ordered = list_first_entry(&log->logged_list[index],
+                                          struct btrfs_ordered_extent,
+                                          log_list);
+               list_del_init(&ordered->log_list);
+               spin_unlock_irq(&log->log_extents_lock[index]);
+               wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
+                                                  &ordered->flags));
+               btrfs_put_ordered_extent(ordered);
+               spin_lock_irq(&log->log_extents_lock[index]);
+       }
+       spin_unlock_irq(&log->log_extents_lock[index]);
+}
+
+void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
+{
+       struct btrfs_ordered_extent *ordered;
+       int index = transid % 2;
+
+       spin_lock_irq(&log->log_extents_lock[index]);
+       while (!list_empty(&log->logged_list[index])) {
+               ordered = list_first_entry(&log->logged_list[index],
+                                          struct btrfs_ordered_extent,
+                                          log_list);
+               list_del_init(&ordered->log_list);
+               spin_unlock_irq(&log->log_extents_lock[index]);
+               btrfs_put_ordered_extent(ordered);
+               spin_lock_irq(&log->log_extents_lock[index]);
+       }
+       spin_unlock_irq(&log->log_extents_lock[index]);
+}
+
 /*
  * used to drop a reference on an ordered extent.  This will free
  * the extent if the last reference is dropped
 
 #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent
                                       * has done its due diligence in updating
                                       * the isize. */
+#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
+                                      ordered extent */
 
 struct btrfs_ordered_extent {
        /* logical offset in the file */
        /* number of bytes that still need writing */
        u64 bytes_left;
 
+       /* number of bytes that still need csumming */
+       u64 csum_bytes_left;
+
        /*
         * the end of the ordered extent which is behind it but
         * didn't update disk_i_size. Please see the comment of
        /* list of checksums for insertion when the extent io is done */
        struct list_head list;
 
+       /* If we need to wait on this to be done */
+       struct list_head log_list;
+
        /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
        wait_queue_head_t wait;
 
                                 struct btrfs_root *root,
                                 struct inode *inode);
 void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
+void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
+void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
+void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
 int __init ordered_data_init(void);
 void ordered_data_exit(void);
 #endif
 
        unsigned long log_transid = 0;
 
        mutex_lock(&root->log_mutex);
+       log_transid = root->log_transid;
        index1 = root->log_transid % 2;
        if (atomic_read(&root->log_commit[index1])) {
                wait_log_commit(trans, root, root->log_transid);
        /* bail out if we need to do a full commit */
        if (root->fs_info->last_trans_log_full_commit == trans->transid) {
                ret = -EAGAIN;
+               btrfs_free_logged_extents(log, log_transid);
                mutex_unlock(&root->log_mutex);
                goto out;
        }
 
-       log_transid = root->log_transid;
        if (log_transid % 2 == 0)
                mark = EXTENT_DIRTY;
        else
        ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
+               btrfs_free_logged_extents(log, log_transid);
                mutex_unlock(&root->log_mutex);
                goto out;
        }
                }
                root->fs_info->last_trans_log_full_commit = trans->transid;
                btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+               btrfs_free_logged_extents(log, log_transid);
                mutex_unlock(&log_root_tree->log_mutex);
                ret = -EAGAIN;
                goto out;
                btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
                wait_log_commit(trans, log_root_tree,
                                log_root_tree->log_transid);
+               btrfs_free_logged_extents(log, log_transid);
                mutex_unlock(&log_root_tree->log_mutex);
                ret = 0;
                goto out;
         */
        if (root->fs_info->last_trans_log_full_commit == trans->transid) {
                btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+               btrfs_free_logged_extents(log, log_transid);
                mutex_unlock(&log_root_tree->log_mutex);
                ret = -EAGAIN;
                goto out_wake_log_root;
                                EXTENT_DIRTY | EXTENT_NEW);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
+               btrfs_free_logged_extents(log, log_transid);
                mutex_unlock(&log_root_tree->log_mutex);
                goto out_wake_log_root;
        }
        btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+       btrfs_wait_logged_extents(log, log_transid);
 
        btrfs_set_super_log_root(root->fs_info->super_for_commit,
                                log_root_tree->node->start);
                                  EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
        }
 
+       /*
+        * We may have short-circuited the log tree with the full commit logic
+        * and left ordered extents on our list, so clear these out to keep us
+        * from leaking inodes and memory.
+        */
+       btrfs_free_logged_extents(log, 0);
+       btrfs_free_logged_extents(log, 1);
+
        free_extent_buffer(log->node);
        kfree(log);
 }
        struct btrfs_root *log = root->log_root;
        struct btrfs_file_extent_item *fi;
        struct extent_buffer *leaf;
+       struct btrfs_ordered_extent *ordered;
        struct list_head ordered_sums;
        struct btrfs_map_token token;
        struct btrfs_key key;
-       u64 csum_offset = em->mod_start - em->start;
-       u64 csum_len = em->mod_len;
+       u64 mod_start = em->mod_start;
+       u64 mod_len = em->mod_len;
+       u64 csum_offset;
+       u64 csum_len;
        u64 extent_offset = em->start - em->orig_start;
        u64 block_len;
        int ret;
+       int index = log->log_transid % 2;
        bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
        INIT_LIST_HEAD(&ordered_sums);
                csum_len = block_len;
        }
 
+       /*
+        * First check and see if our csums are on our outstanding ordered
+        * extents.
+        */
+again:
+       spin_lock_irq(&log->log_extents_lock[index]);
+       list_for_each_entry(ordered, &log->logged_list[index], log_list) {
+               struct btrfs_ordered_sum *sum;
+
+               if (!mod_len)
+                       break;
+
+               if (ordered->inode != inode)
+                       continue;
+
+               if (ordered->file_offset + ordered->len <= mod_start ||
+                   mod_start + mod_len <= ordered->file_offset)
+                       continue;
+
+               /*
+                * We are going to copy all the csums on this ordered extent, so
+                * go ahead and adjust mod_start and mod_len in case this
+                * ordered extent has already been logged.
+                */
+               if (ordered->file_offset > mod_start) {
+                       if (ordered->file_offset + ordered->len >=
+                           mod_start + mod_len)
+                               mod_len = ordered->file_offset - mod_start;
+                       /*
+                        * If we have this case
+                        *
+                        * |--------- logged extent ---------|
+                        *       |----- ordered extent ----|
+                        *
+                        * Just don't mess with mod_start and mod_len, we'll
+                        * just end up logging more csums than we need and it
+                        * will be ok.
+                        */
+               } else {
+                       if (ordered->file_offset + ordered->len <
+                           mod_start + mod_len) {
+                               mod_len = (mod_start + mod_len) -
+                                       (ordered->file_offset + ordered->len);
+                               mod_start = ordered->file_offset +
+                                       ordered->len;
+                       } else {
+                               mod_len = 0;
+                       }
+               }
+
+               /*
+                * To keep us from looping for the above case of an ordered
+                * extent that falls inside of the logged extent.
+                */
+               if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
+                                    &ordered->flags))
+                       continue;
+               atomic_inc(&ordered->refs);
+               spin_unlock_irq(&log->log_extents_lock[index]);
+               /*
+                * we've dropped the lock, we must either break or
+                * start over after this.
+                */
+
+               wait_event(ordered->wait, ordered->csum_bytes_left == 0);
+
+               list_for_each_entry(sum, &ordered->list, list) {
+                       ret = btrfs_csum_file_blocks(trans, log, sum);
+                       if (ret) {
+                               btrfs_put_ordered_extent(ordered);
+                               goto unlocked;
+                       }
+               }
+               btrfs_put_ordered_extent(ordered);
+               goto again;
+
+       }
+       spin_unlock_irq(&log->log_extents_lock[index]);
+unlocked:
+
+       if (!mod_len || ret)
+               return ret;
+
+       csum_offset = mod_start - em->start;
+       csum_len = mod_len;
+
        /* block start is already adjusted for the file extent offset. */
        ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
                                       em->block_start + csum_offset,
        struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
        u64 test_gen;
        int ret = 0;
+       int num = 0;
 
        INIT_LIST_HEAD(&extents);
 
 
        list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
                list_del_init(&em->list);
+
+               /*
+                * Just an arbitrary number, this can be really CPU intensive
+                * once we start getting a lot of extents, and really once we
+                * have a bunch of extents we just want to commit since it will
+                * be faster.
+                */
+               if (++num > 32768) {
+                       list_del_init(&tree->modified_extents);
+                       ret = -EFBIG;
+                       goto process;
+               }
+
                if (em->generation <= test_gen)
                        continue;
                /* Need a ref to keep it from getting evicted from cache */
                atomic_inc(&em->refs);
                set_bit(EXTENT_FLAG_LOGGING, &em->flags);
                list_add_tail(&em->list, &extents);
+               num++;
        }
 
        list_sort(NULL, &extents, extent_cmp);
 
+process:
        while (!list_empty(&extents)) {
                em = list_entry(extents.next, struct extent_map, list);
 
 
        mutex_lock(&BTRFS_I(inode)->log_mutex);
 
+       btrfs_get_logged_extents(log, inode);
+
        /*
         * a brute force approach to making sure we get the most uptodate
         * copies of everything.
        BTRFS_I(inode)->logged_trans = trans->transid;
        BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
 out_unlock:
+       if (err)
+               btrfs_free_logged_extents(log, log->log_transid);
        mutex_unlock(&BTRFS_I(inode)->log_mutex);
 
        btrfs_free_path(path);
 end_trans:
        dput(old_parent);
        if (ret < 0) {
-               WARN_ON(ret != -ENOSPC);
                root->fs_info->last_trans_log_full_commit = trans->transid;
                ret = 1;
        }