{
        loff_t offset = dio->iocb->ki_pos;
        ssize_t transferred = 0;
+       int err;
 
        /*
         * AIO submission can race with bio completion to get here while
        if (ret == 0)
                ret = transferred;
 
+       /*
+        * Try again to invalidate clean pages which might have been cached by
+        * non-direct readahead, or faulted in by get_user_pages() if the source
+        * of the write was an mmap'ed region of the file we're writing.  Either
+        * one is a pretty crazy thing to do, so we don't support it 100%.  If
+        * this invalidation fails, tough, the write still worked...
+        */
+       if (ret > 0 && dio->op == REQ_OP_WRITE &&
+           dio->inode->i_mapping->nrpages) {
+               err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+                                       offset >> PAGE_SHIFT,
+                                       (offset + ret - 1) >> PAGE_SHIFT);
+               WARN_ON_ONCE(err);
+       }
+
        if (dio->end_io) {
-               int err;
 
                // XXX: ki_pos??
                err = dio->end_io(dio->iocb, offset, ret, dio->private);
        struct dio *dio = bio->bi_private;
        unsigned long remaining;
        unsigned long flags;
+       bool defer_completion = false;
 
        /* cleanup the bio */
        dio_bio_complete(dio, bio);
        spin_unlock_irqrestore(&dio->bio_lock, flags);
 
        if (remaining == 0) {
-               if (dio->result && dio->defer_completion) {
+               /*
+                * Defer completion when defer_completion is set or
+                * when the inode has pages mapped and this is AIO write.
+                * We need to invalidate those pages because there is a
+                * chance they contain stale data in the case buffered IO
+                * went in between AIO submission and completion into the
+                * same region.
+                */
+               if (dio->result)
+                       defer_completion = dio->defer_completion ||
+                                          (dio->op == REQ_OP_WRITE &&
+                                           dio->inode->i_mapping->nrpages);
+               if (defer_completion) {
                        INIT_WORK(&dio->complete_work, dio_aio_complete_work);
                        queue_work(dio->inode->i_sb->s_dio_done_wq,
                                   &dio->complete_work);
         * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
         * so that we can call ->fsync.
         */
-       if (dio->is_async && iov_iter_rw(iter) == WRITE &&
-           ((iocb->ki_filp->f_flags & O_DSYNC) ||
-            IS_SYNC(iocb->ki_filp->f_mapping->host))) {
-               retval = dio_set_defer_completion(dio);
+       if (dio->is_async && iov_iter_rw(iter) == WRITE) {
+               retval = 0;
+               if ((iocb->ki_filp->f_flags & O_DSYNC) ||
+                   IS_SYNC(iocb->ki_filp->f_mapping->host))
+                       retval = dio_set_defer_completion(dio);
+               else if (!dio->inode->i_sb->s_dio_done_wq) {
+                       /*
+                        * In case of AIO write racing with buffered read we
+                        * need to defer completion. We can't decide this now,
+                        * however the workqueue needs to be initialized here.
+                        */
+                       retval = sb_init_dio_done_wq(dio->inode->i_sb);
+               }
                if (retval) {
                        /*
                         * We grab i_mutex only for reads so we don't have
 
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
        struct kiocb *iocb = dio->iocb;
+       struct inode *inode = file_inode(iocb->ki_filp);
        ssize_t ret;
 
+       /*
+        * Try again to invalidate clean pages which might have been cached by
+        * non-direct readahead, or faulted in by get_user_pages() if the source
+        * of the write was an mmap'ed region of the file we're writing.  Either
+        * one is a pretty crazy thing to do, so we don't support it 100%.  If
+        * this invalidation fails, tough, the write still worked...
+        */
+       if (!dio->error &&
+           (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+               ret = invalidate_inode_pages2_range(inode->i_mapping,
+                               iocb->ki_pos >> PAGE_SHIFT,
+                               (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
+               WARN_ON_ONCE(ret);
+       }
+
        if (dio->end_io) {
                ret = dio->end_io(iocb,
                                dio->error ? dio->error : dio->size,
 
        ret = iomap_dio_complete(dio);
 
-       /*
-        * Try again to invalidate clean pages which might have been cached by
-        * non-direct readahead, or faulted in by get_user_pages() if the source
-        * of the write was an mmap'ed region of the file we're writing.  Either
-        * one is a pretty crazy thing to do, so we don't support it 100%.  If
-        * this invalidation fails, tough, the write still worked...
-        */
-       if (iov_iter_rw(iter) == WRITE) {
-               int err = invalidate_inode_pages2_range(mapping,
-                               start >> PAGE_SHIFT, end >> PAGE_SHIFT);
-               WARN_ON_ONCE(err);
-       }
-
        return ret;
 
 out_free_dio:
 
         * we're writing.  Either one is a pretty crazy thing to do,
         * so we don't support it 100%.  If this invalidation
         * fails, tough, the write still worked...
+        *
+        * Most of the time we do not need this since dio_complete() will do
+        * the invalidation for us. However there are some file systems that
+        * do not end up with dio_complete() being called, so let's not break
+        * them by removing it completely
         */
-       invalidate_inode_pages2_range(mapping,
-                               pos >> PAGE_SHIFT, end);
+       if (mapping->nrpages)
+               invalidate_inode_pages2_range(mapping,
+                                       pos >> PAGE_SHIFT, end);
 
        if (written > 0) {
                pos += written;