]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
ext4: update i_disksize if direct write past ondisk size
authorEryu Guan <guaneryu@gmail.com>
Thu, 22 Mar 2018 15:44:59 +0000 (11:44 -0400)
committerBrian Maly <brian.maly@oracle.com>
Mon, 14 Jan 2019 17:29:31 +0000 (12:29 -0500)
Currently in ext4 direct write path, we update i_disksize only when
new eof is greater than i_size, and don't update it even when new
eof is greater than i_disksize but less than i_size. This doesn't
work well with delalloc buffer write, which updates i_size and
i_disksize only when delalloc blocks are resolved (at writeback
time), the i_disksize from direct write can be lost if a previous
buffer write succeeded at write time but failed at writeback time,
then results in corrupted ondisk inode size.

Consider this case, first buffer write 4k data to a new file at
offset 16k with delayed allocation, then direct write 4k data to the
same file at offset 4k before delalloc blocks are resolved, which
doesn't update i_disksize because it writes within i_size(20k), but
the extent tree metadata has been committed in journal. Then
writeback of the delalloc blocks fails (due to device error etc.),
and i_size/i_disksize from buffer write can't be written to disk
(still zero). A subsequent umount/mount cycle recovers journal and
writes extent tree metadata from direct write to disk, but with
i_disksize being zero.

Fix it by updating i_disksize too in direct write path when new eof
is greater than i_disksize but less than i_size, so i_disksize is
always consistent with direct write.

This fixes occasional i_size corruption in fstests generic/475.

Signed-off-by: Eryu Guan <guaneryu@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Orabug: 28940828

commit 45d8ec4d9fd5468c08f2ef0b2b132bb62dc81a3d upstream

Signed-off-by: Brian Maly <brian.maly@oracle.com>
Conflicts:
fs/ext4/indirect.c
code line mismatch

Signed-off-by: Shan Hai <shan.hai@oracle.com>
Reviewed-by: Ashish Samant <ashish.samant@oracle.com>
Signed-off-by: Brian Maly <brian.maly@oracle.com>
fs/ext4/indirect.c

index f5d37de17558be4a6a7d983905a5cc59c00d23a8..10fcbfd2ef85c0cfbd6facfc61b7b1c1586fee59 100644 (file)
@@ -647,6 +647,7 @@ ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       struct ext4_inode_info *ei = EXT4_I(inode);
        handle_t *handle;
        ssize_t ret;
        int orphan = 0;
@@ -656,7 +657,7 @@ ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        if (iov_iter_rw(iter) == WRITE) {
                loff_t final_size = offset + count;
 
-               if (final_size > inode->i_size) {
+               if (final_size > inode->i_size || final_size > ei->i_disksize) {
                        /* Credits for sb + inode write */
                        handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
                        if (IS_ERR(handle)) {
@@ -736,9 +737,11 @@ locked:
                        ext4_orphan_del(handle, inode);
                if (ret > 0) {
                        loff_t end = offset + ret;
-                       if (end > inode->i_size) {
+                       if (end > inode->i_size || end > ei->i_disksize) {
                                ext4_update_i_disksize(inode, end);
-                               i_size_write(inode, end);
+                               if (end > inode->i_size)
+                                       i_size_write(inode, end);
+
                                /*
                                 * We're going to return a positive `ret'
                                 * here due to non-zero-length I/O, so there's