int ret;
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
-       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)
+       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 1, nr_written);
-       else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
+       } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 0, nr_written);
-       else if (!btrfs_test_opt(root, COMPRESS) &&
-                !(BTRFS_I(inode)->force_compress) &&
-                !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))
+       } else if (!btrfs_test_opt(root, COMPRESS) &&
+                  !(BTRFS_I(inode)->force_compress) &&
+                  !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {
                ret = cow_file_range(inode, locked_page, start, end,
                                      page_started, nr_written, 1);
-       else
+       } else {
+               set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                       &BTRFS_I(inode)->runtime_flags);
                ret = cow_file_range_async(inode, locked_page, start, end,
                                           page_started, nr_written);
+       }
        return ret;
 }
 
 
        /* start IO across the range first to instantiate any delalloc
         * extents
         */
-       filemap_write_and_wait_range(inode->i_mapping, start, orig_end);
+       filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+
+       /*
+        * So with compression we will find and lock a dirty page and clear the
+        * first one as dirty, setup an async extent, and immediately return
+        * with the entire range locked but with nobody actually marked with
+        * writeback.  So we can't just filemap_write_and_wait_range() and
+        * expect it to work since it will just kick off a thread to do the
+        * actual work.  So we need to call filemap_fdatawrite_range _again_
+        * since it will wait on the page lock, which won't be unlocked until
+        * after the pages have been marked as writeback and so we're good to go
+        * from there.  We have to do this otherwise we'll miss the ordered
+        * extents and that results in badness.  Please Josef, do not think you
+        * know better and pull this out at some point in the future, it is
+        * right and you are wrong.
+        */
+       if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                    &BTRFS_I(inode)->runtime_flags))
+               filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+
+       filemap_fdatawait_range(inode->i_mapping, start, orig_end);
 
        end = orig_end;
        found = 0;