atomic_t i_ioend_count; /* Number of outstanding io_end structs */
        /* current io_end structure for async DIO write*/
        ext4_io_end_t *cur_aio_dio;
+       atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
 
        spinlock_t i_block_reservation_lock;
 
 
 #define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
 
+/* For ioend & aio unwritten conversion wait queues */
+#define EXT4_WQ_HASH_SZ                37
+#define ext4_ioend_wq(v)   (&ext4__ioend_wq[((unsigned long)(v)) %\
+                                           EXT4_WQ_HASH_SZ])
+#define ext4_aio_mutex(v)  (&ext4__aio_mutex[((unsigned long)(v)) %\
+                                            EXT4_WQ_HASH_SZ])
+extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
+
 #endif /* __KERNEL__ */
 
 #endif /* _EXT4_H */
 
                 * that this IO needs to convertion to written when IO is
                 * completed
                 */
-               if (io)
+               if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
                        io->flag = EXT4_IO_END_UNWRITTEN;
-               else
+                       atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+               } else
                        ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
                if (ext4_should_dioread_nolock(inode))
                        map->m_flags |= EXT4_MAP_UNINIT;
                 * that we need to perform convertion when IO is done.
                 */
                if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
-                       if (io)
+                       if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
                                io->flag = EXT4_IO_END_UNWRITTEN;
-                       else
+                               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+                       } else
                                ext4_set_inode_state(inode,
                                                     EXT4_STATE_DIO_UNWRITTEN);
                }
 
        return 0;
 }
 
+static void ext4_aiodio_wait(struct inode *inode)
+{
+       wait_queue_head_t *wq = ext4_ioend_wq(inode);
+
+       wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
+}
+
+/*
+ * This tests whether the IO in question is block-aligned or not.
+ * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
+ * are converted to written only after the IO is complete.  Until they are
+ * mapped, these blocks appear as holes, so dio_zero_block() will assume that
+ * it needs to zero out portions of the start and/or end block.  If 2 AIO
+ * threads are at work on the same unwritten block, they must be synchronized
+ * or one thread will zero the other's data, causing corruption.
+ */
+static int
+ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
+                  unsigned long nr_segs, loff_t pos)
+{
+       struct super_block *sb = inode->i_sb;
+       int blockmask = sb->s_blocksize - 1;
+       size_t count = iov_length(iov, nr_segs);
+       loff_t final_size = pos + count;
+
+       if (pos >= inode->i_size)
+               return 0;
+
+       if ((pos & blockmask) || (final_size & blockmask))
+               return 1;
+
+       return 0;
+}
+
 static ssize_t
 ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                unsigned long nr_segs, loff_t pos)
 {
        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+       int unaligned_aio = 0;
+       int ret;
 
        /*
         * If we have encountered a bitmap-format file, the size limit
                        nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
                                              sbi->s_bitmap_maxbytes - pos);
                }
+       } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
+                  !is_sync_kiocb(iocb))) {
+               unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
        }
 
-       return generic_file_aio_write(iocb, iov, nr_segs, pos);
+       /* Unaligned direct AIO must be serialized; see comment above */
+       if (unaligned_aio) {
+               static unsigned long unaligned_warn_time;
+
+               /* Warn about this once per day */
+               if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
+                       ext4_msg(inode->i_sb, KERN_WARNING,
+                                "Unaligned AIO/DIO on inode %ld by %s; "
+                                "performance will be poor.",
+                                inode->i_ino, current->comm);
+               mutex_lock(ext4_aio_mutex(inode));
+               ext4_aiodio_wait(inode);
+       }
+
+       ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+
+       if (unaligned_aio)
+               mutex_unlock(ext4_aio_mutex(inode));
+
+       return ret;
 }
 
 static const struct vm_operations_struct ext4_file_vm_ops = {
 
 
 static struct kmem_cache *io_page_cachep, *io_end_cachep;
 
-#define WQ_HASH_SZ             37
-#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
-static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
-
 int __init ext4_init_pageio(void)
 {
-       int i;
-
        io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
        if (io_page_cachep == NULL)
                return -ENOMEM;
                kmem_cache_destroy(io_page_cachep);
                return -ENOMEM;
        }
-       for (i = 0; i < WQ_HASH_SZ; i++)
-               init_waitqueue_head(&ioend_wq[i]);
-
        return 0;
 }
 
 
 void ext4_ioend_wait(struct inode *inode)
 {
-       wait_queue_head_t *wq = to_ioend_wq(inode);
+       wait_queue_head_t *wq = ext4_ioend_wq(inode);
 
        wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
 }
        for (i = 0; i < io->num_io_pages; i++)
                put_io_page(io->pages[i]);
        io->num_io_pages = 0;
-       wq = to_ioend_wq(io->inode);
+       wq = ext4_ioend_wq(io->inode);
        if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
            waitqueue_active(wq))
                wake_up_all(wq);
        struct inode *inode = io->inode;
        loff_t offset = io->offset;
        ssize_t size = io->size;
+       wait_queue_head_t *wq;
        int ret = 0;
 
        ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
        if (io->iocb)
                aio_complete(io->iocb, io->result, 0);
        /* clear the DIO AIO unwritten flag */
-       io->flag &= ~EXT4_IO_END_UNWRITTEN;
+       if (io->flag & EXT4_IO_END_UNWRITTEN) {
+               io->flag &= ~EXT4_IO_END_UNWRITTEN;
+               /* Wake up anyone waiting on unwritten extent conversion */
+               wq = ext4_ioend_wq(io->inode);
+               if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
+                   waitqueue_active(wq)) {
+                       wake_up_all(wq);
+               }
+       }
+
        return ret;
 }
 
 
        ei->i_sync_tid = 0;
        ei->i_datasync_tid = 0;
        atomic_set(&ei->i_ioend_count, 0);
+       atomic_set(&ei->i_aiodio_unwritten, 0);
 
        return &ei->vfs_inode;
 }
        kfree(ext4_feat);
 }
 
+/* Shared across all ext4 file systems */
+wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
+
 static int __init ext4_init_fs(void)
 {
-       int err;
+       int i, err;
 
        ext4_check_flag_values();
+
+       for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
+               mutex_init(&ext4__aio_mutex[i]);
+               init_waitqueue_head(&ext4__ioend_wq[i]);
+       }
+
        err = ext4_init_pageio();
        if (err)
                return err;