]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
Revert "ocfs2: code clean up for direct io"
authorWengang Wang <wen.gang.wang@oracle.com>
Tue, 23 Jan 2018 17:16:59 +0000 (09:16 -0800)
committerJack Vogel <jack.vogel@oracle.com>
Wed, 24 Jan 2018 22:43:54 +0000 (14:43 -0800)
This reverts commit 11fc5176778a82fb5bb0100413496e125862e649.

This is patch in a patch set but back ported separately and caused
problem.

orabug: 27431376
Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Ashish Samant <ashish.samant@oracle.com>
fs/ocfs2/file.c
fs/ocfs2/ocfs2_trace.h

index 79a9e2d4f8bf40dba1eaebbaa879fca31e7dfb8e..53e71d2e0958e7955eb4bce3db4dc71e06a7094d 100644 (file)
@@ -1405,6 +1405,44 @@ out:
        return ret;
 }
 
+/*
+ * Will look for holes and unwritten extents in the range starting at
+ * pos for count bytes (inclusive).
+ */
+static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
+                                      size_t count)
+{
+       int ret = 0;
+       unsigned int extent_flags;
+       u32 cpos, clusters, extent_len, phys_cpos;
+       struct super_block *sb = inode->i_sb;
+
+       cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
+       clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
+
+       while (clusters) {
+               ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
+                                        &extent_flags);
+               if (ret < 0) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+
+               if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) {
+                       ret = 1;
+                       break;
+               }
+
+               if (extent_len > clusters)
+                       extent_len = clusters;
+
+               clusters -= extent_len;
+               cpos += extent_len;
+       }
+out:
+       return ret;
+}
+
 static int ocfs2_write_remove_suid(struct inode *inode)
 {
        int ret;
@@ -2136,12 +2174,18 @@ out:
 
 static int ocfs2_prepare_inode_for_write(struct file *file,
                                         loff_t pos,
-                                        size_t count)
+                                        size_t count,
+                                        int appending,
+                                        int *direct_io,
+                                        int *has_refcount)
 {
        int ret = 0, meta_level = 0;
        struct dentry *dentry = file->f_path.dentry;
        struct inode *inode = d_inode(dentry);
        loff_t end;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       int full_coherency = !(osb->s_mount_opt &
+               OCFS2_MOUNT_COHERENCY_BUFFERED);
 
        /*
         * We start with a read level meta lock and only jump to an ex
@@ -2190,6 +2234,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
                                                               pos,
                                                               count,
                                                               &meta_level);
+                       if (has_refcount)
+                               *has_refcount = 1;
+                       if (direct_io)
+                               *direct_io = 0;
                }
 
                if (ret < 0) {
@@ -2197,12 +2245,67 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
                        goto out_unlock;
                }
 
+               /*
+                * Skip the O_DIRECT checks if we don't need
+                * them.
+                */
+               if (!direct_io || !(*direct_io))
+                       break;
+
+               /*
+                * There's no sane way to do direct writes to an inode
+                * with inline data.
+                */
+               if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+                       *direct_io = 0;
+                       break;
+               }
+
+               /*
+                * Allowing concurrent direct writes means
+                * i_size changes wouldn't be synchronized, so
+                * one node could wind up truncating another
+                * nodes writes.
+                */
+               if (end > i_size_read(inode) && !full_coherency) {
+                       *direct_io = 0;
+                       break;
+               }
+
+               /*
+                * Fallback to old way if the feature bit is not set.
+                */
+               if (end > i_size_read(inode) &&
+                               !ocfs2_supports_append_dio(osb)) {
+                       *direct_io = 0;
+                       break;
+               }
+
+               /*
+                * We don't fill holes during direct io, so
+                * check for them here. If any are found, the
+                * caller will have to retake some cluster
+                * locks and initiate the io as buffered.
+                */
+               ret = ocfs2_check_range_for_holes(inode, pos, count);
+               if (ret == 1) {
+                       /*
+                        * Fallback to old way if the feature bit is not set.
+                        * Otherwise try dio first and then complete the rest
+                        * request through buffer io.
+                        */
+                       if (!ocfs2_supports_append_dio(osb))
+                               *direct_io = 0;
+                       ret = 0;
+               } else if (ret < 0)
+                       mlog_errno(ret);
                break;
        }
 
 out_unlock:
        trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
-                                           pos, count);
+                                           pos, appending, count,
+                                           direct_io, has_refcount);
 
        if (meta_level >= 0)
                ocfs2_inode_unlock(inode, meta_level);
@@ -2214,10 +2317,11 @@ out:
 static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
                                    struct iov_iter *from)
 {
-       int direct_io, rw_level, have_alloc_sem  = 0;
+       int direct_io, appending, rw_level, have_alloc_sem  = 0;
+       int can_do_direct, has_refcount = 0;
        ssize_t written = 0;
        ssize_t ret;
-       size_t count = iov_iter_count(from);
+       size_t count = iov_iter_count(from), orig_count;
        loff_t old_size;
        u32 old_clusters;
        struct file *file = iocb->ki_filp;
@@ -2226,6 +2330,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
        int full_coherency = !(osb->s_mount_opt &
                               OCFS2_MOUNT_COHERENCY_BUFFERED);
        int unaligned_dio = 0;
+       int dropped_dio = 0;
 
        trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
                (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2236,12 +2341,14 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
        if (count == 0)
                return 0;
 
+       appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
        direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
 
        mutex_lock(&inode->i_mutex);
 
        ocfs2_iocb_clear_sem_locked(iocb);
 
+relock:
        /* to match setattr's i_mutex -> rw_lock ordering */
        if (direct_io) {
                have_alloc_sem = 1;
@@ -2280,6 +2387,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
                ocfs2_inode_unlock(inode, 1);
        }
 
+       orig_count = iov_iter_count(from);
        ret = generic_write_checks(iocb, from);
        if (ret <= 0) {
                if (ret)
@@ -2288,7 +2396,9 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
        }
        count = ret;
 
-       ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
+       can_do_direct = direct_io;
+       ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending,
+                                           &can_do_direct, &has_refcount);
        if (ret < 0) {
                mlog_errno(ret);
                goto out;
@@ -2297,6 +2407,23 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
        if (direct_io && !is_sync_kiocb(iocb))
                unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos);
 
+       /*
+        * We can't complete the direct I/O as requested, fall back to
+        * buffered I/O.
+        */
+       if (direct_io && !can_do_direct) {
+               ocfs2_rw_unlock(inode, rw_level);
+
+               have_alloc_sem = 0;
+               rw_level = -1;
+
+               direct_io = 0;
+               iocb->ki_flags &= ~IOCB_DIRECT;
+               iov_iter_reexpand(from, orig_count);
+               dropped_dio = 1;
+               goto relock;
+       }
+
        if (unaligned_dio) {
                static unsigned long unaligned_warn_time;
                /*
@@ -2352,7 +2479,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
                goto no_sync;
 
        if (((file->f_flags & O_DSYNC) && !direct_io) ||
-           IS_SYNC(inode)) {
+           IS_SYNC(inode) || dropped_dio) {
                /*
                 * There is an performance issue when we are doing a flush with
                 * WB_SYNC_ALL flag. block_write_full_page() will transfer it
index 09d0c89a9daf963df206e45fc48cf8339d85064b..6cb019b7c6a83c4ec449baff12652e84b5fca06a 100644 (file)
@@ -1450,20 +1450,28 @@ DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range);
 
 TRACE_EVENT(ocfs2_prepare_inode_for_write,
        TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
-                unsigned long count),
-       TP_ARGS(ino, saved_pos, count),
+                int appending, unsigned long count,
+                int *direct_io, int *has_refcount),
+       TP_ARGS(ino, saved_pos, appending, count, direct_io, has_refcount),
        TP_STRUCT__entry(
                __field(unsigned long long, ino)
                __field(unsigned long long, saved_pos)
+               __field(int, appending)
                __field(unsigned long, count)
+               __field(int, direct_io)
+               __field(int, has_refcount)
        ),
        TP_fast_assign(
                __entry->ino = ino;
                __entry->saved_pos = saved_pos;
+               __entry->appending = appending;
                __entry->count = count;
+               __entry->direct_io = direct_io ? *direct_io : -1;
+               __entry->has_refcount = has_refcount ? *has_refcount : -1;
        ),
-       TP_printk("%llu %llu %lu", __entry->ino,
-                 __entry->saved_pos, __entry->count)
+       TP_printk("%llu %llu %d %lu %d %d", __entry->ino,
+                 __entry->saved_pos, __entry->appending, __entry->count,
+                 __entry->direct_io, __entry->has_refcount)
 );
 
 DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);