ocfs2: serialize unaligned aio

author Mark Fasheh <mfasheh@suse.com>

Mon, 15 Aug 2011 17:08:44 +0000 (10:08 -0700)

committer Maxim Uvarov <maxim.uvarov@oracle.com>

Thu, 22 Mar 2012 00:17:23 +0000 (17:17 -0700)
author Mark Fasheh <mfasheh@suse.com>
Mon, 15 Aug 2011 17:08:44 +0000 (10:08 -0700)
committer Maxim Uvarov <maxim.uvarov@oracle.com>
Thu, 22 Mar 2012 00:17:23 +0000 (17:17 -0700)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c

index cdd91f4e7339ef3dd07b617557b6e275c9b882ae..d4859fdbb9444bf88ba24c9bb227f684d37cabb1 100644 (file)
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -564,6 +564,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
  {
         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
         int level;
+       wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
  
         /* this io's submitter should not have unlocked this before we could */
         BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -573,6 +574,15 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
                 ocfs2_iocb_clear_sem_locked(iocb);
         }
  
+       if (ocfs2_iocb_is_unaligned_aio(iocb)) {
+               ocfs2_iocb_clear_unaligned_aio(iocb);
+
+               if (atomic_dec_and_test(&OCFS2_I(inode)->ip_unaligned_aio) &&
+                   waitqueue_active(wq)) {
+                       wake_up_all(wq);
+               }
+       }
+
         ocfs2_iocb_clear_rw_locked(iocb);
  
         level = ocfs2_iocb_rw_locked_level(iocb);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h

index 75cf3ad987a66d911c15234a803243185ccc5a94..ffb2da370a99d05dd4b919fc64a5483dbc2df7a3 100644 (file)
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -78,6 +78,7 @@ enum ocfs2_iocb_lock_bits {
         OCFS2_IOCB_RW_LOCK = 0,
         OCFS2_IOCB_RW_LOCK_LEVEL,
         OCFS2_IOCB_SEM,
+       OCFS2_IOCB_UNALIGNED_IO,
         OCFS2_IOCB_NUM_LOCKS
  };
  
@@ -91,4 +92,17 @@ enum ocfs2_iocb_lock_bits {
         clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
  #define ocfs2_iocb_is_sem_locked(iocb) \
         test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+
+#define ocfs2_iocb_set_unaligned_aio(iocb) \
+       set_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_clear_unaligned_aio(iocb) \
+       clear_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_is_unaligned_aio(iocb) \
+       test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
+
+#define OCFS2_IOEND_WQ_HASH_SZ 37
+#define ocfs2_ioend_wq(v)   (&ocfs2__ioend_wq[((unsigned long)(v)) %\
+                                           OCFS2_IOEND_WQ_HASH_SZ])
+extern wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];
+
  #endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index 224e38f33c914181bbff24e77dbf7e355e61333d..fa090b0a1e82ecbafd2c30cea8dbb54369e3e1b6 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2038,6 +2038,23 @@ out:
         return ret;
  }
  
+static void ocfs2_aiodio_wait(struct inode *inode)
+{
+       wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
+
+       wait_event(*wq, (atomic_read(&OCFS2_I(inode)->ip_unaligned_aio) == 0));
+}
+
+static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
+{
+       int blockmask = inode->i_sb->s_blocksize - 1;
+       loff_t final_size = pos + count;
+
+       if ((pos & blockmask) || (final_size & blockmask))
+               return 1;
+       return 0;
+}
+
  static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
                                             struct file *file,
                                             loff_t pos, size_t count,
@@ -2214,6 +2231,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
         int full_coherency = !(osb->s_mount_opt &
                                OCFS2_MOUNT_COHERENCY_BUFFERED);
+       int unaligned_dio = 0;
  
         trace_ocfs2_file_write_iter(inode, file, file->f_path.dentry,
                 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2282,6 +2300,10 @@ relock:
                 goto out;
         }
  
+       if (direct_io && !is_sync_kiocb(iocb))
+               unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_left,
+                                                     *ppos);
+
         /*
          * We can't complete the direct I/O as requested, fall back to
          * buffered I/O.
@@ -2297,6 +2319,18 @@ relock:
                 goto relock;
         }
  
+       if (unaligned_dio) {
+               /*
+                * Wait on previous unaligned aio to complete before
+                * proceeding.
+                */
+               ocfs2_aiodio_wait(inode);
+
+               /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */
+               atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio);
+               ocfs2_iocb_set_unaligned_aio(iocb);
+       }
+
         /*
          * To later detect whether a journal commit for sync writes is
          * necessary, we sample i_size, and cluster count here.
@@ -2365,8 +2399,12 @@ out_dio:
         if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
                 rw_level = -1;
                 have_alloc_sem = 0;
+               unaligned_dio = 0;
         }
  
+       if (unaligned_dio)
+               atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio);
+
  out:
         if (rw_level != -1)
                 ocfs2_rw_unlock(inode, rw_level);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h

index 1c508b149b3ac1bd4325fd33a9aae6bdb70e024a..88924a3133fae7c15ca3f5a5259b64eecd97022e 100644 (file)
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -43,6 +43,9 @@ struct ocfs2_inode_info
         /* protects extended attribute changes on this inode */
         struct rw_semaphore             ip_xattr_sem;
  
+       /* Number of outstanding AIO's which are not page aligned */
+       atomic_t                        ip_unaligned_aio;
+
         /* These fields are protected by ip_lock */
         spinlock_t                      ip_lock;
         u32                             ip_open_count;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c

index 51a831f9047119295b000fa7236418c83cafe867..d436c1f49684d55d68b56807152975357622ea33 100644 (file)
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -54,6 +54,7 @@
  #include "ocfs1_fs_compat.h"
  
  #include "alloc.h"
+#include "aops.h"
  #include "blockcheck.h"
  #include "dlmglue.h"
  #include "export.h"
@@ -1634,12 +1635,17 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
         return 0;
  }
  
+wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];
+
  static int __init ocfs2_init(void)
  {
-       int status;
+       int status, i;
  
         ocfs2_print_version();
  
+       for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
+               init_waitqueue_head(&ocfs2__ioend_wq[i]);
+
         status = init_ocfs2_uptodate_cache();
         if (status < 0) {
                 mlog_errno(status);
@@ -1781,7 +1787,7 @@ static void ocfs2_inode_init_once(void *data)
         ocfs2_extent_map_init(&oi->vfs_inode);
         INIT_LIST_HEAD(&oi->ip_io_markers);
         oi->ip_dir_start_lookup = 0;
-
+       atomic_set(&oi->ip_unaligned_aio, 0);
         init_rwsem(&oi->ip_alloc_sem);
         init_rwsem(&oi->ip_xattr_sem);
         mutex_init(&oi->ip_io_mutex);
author	Mark Fasheh <mfasheh@suse.com>
	Mon, 15 Aug 2011 17:08:44 +0000 (10:08 -0700)
committer	Maxim Uvarov <maxim.uvarov@oracle.com>
	Thu, 22 Mar 2012 00:17:23 +0000 (17:17 -0700)
fs/ocfs2/aops.c		patch \| blob \| history
fs/ocfs2/aops.h		patch \| blob \| history
fs/ocfs2/file.c		patch \| blob \| history
fs/ocfs2/inode.h		patch \| blob \| history
fs/ocfs2/super.c		patch \| blob \| history