unsigned long runtime_flags;
 
+       /* Keep track of who's O_SYNC/fsycing currently */
+       atomic_t sync_writers;
+
        /* full 64 bit generation number, struct vfs_inode doesn't have a big
         * enough field for this.
         */
 
        ssize_t num_written = 0;
        ssize_t err = 0;
        size_t count, ocount;
+       bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 
        sb_start_write(inode->i_sb);
 
                }
        }
 
+       if (sync)
+               atomic_inc(&BTRFS_I(inode)->sync_writers);
+
        if (unlikely(file->f_flags & O_DIRECT)) {
                num_written = __btrfs_direct_write(iocb, iov, nr_segs,
                                                   pos, ppos, count, ocount);
                        num_written = err;
        }
 out:
+       if (sync)
+               atomic_dec(&BTRFS_I(inode)->sync_writers);
        sb_end_write(inode->i_sb);
        current->backing_dev_info = NULL;
        return num_written ? num_written : err;
         * out of the ->i_mutex. If so, we can flush the dirty pages by
         * multi-task, and make the performance up.
         */
+       atomic_inc(&BTRFS_I(inode)->sync_writers);
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+       atomic_dec(&BTRFS_I(inode)->sync_writers);
        if (ret)
                return ret;
 
 
        int ret = 0;
        int skip_sum;
        int metadata = 0;
+       int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
                                goto out;
                }
                goto mapit;
-       } else if (!skip_sum) {
+       } else if (async && !skip_sum) {
                /* csum items have already been cloned */
                if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
                        goto mapit;
                                   __btrfs_submit_bio_start,
                                   __btrfs_submit_bio_done);
                goto out;
+       } else if (!skip_sum) {
+               ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
+               if (ret)
+                       goto out;
        }
 
 mapit:
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
 
+       if (async_submit)
+               async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
+
        bio_get(bio);
 
        if (!write) {
        extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
        ei->io_tree.track_uptodate = 1;
        ei->io_failure_tree.track_uptodate = 1;
+       atomic_set(&ei->sync_writers, 0);
        mutex_init(&ei->log_mutex);
        mutex_init(&ei->delalloc_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);