#include "xfs_log.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
+#include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
        xfs_log_force(mp, XFS_LOG_SYNC);
 
        /*
-        * Do a delwri reclaim pass first so that as many dirty inodes are
-        * queued up for IO as possible. Then flush the buffers before making
-        * a synchronous path to catch all the remaining inodes are reclaimed.
-        * This makes the reclaim process as quick as possible by avoiding
-        * synchronous writeout and blocking on inodes already in the delwri
-        * state as much as possible.
+        * Flush all pending changes from the AIL.
+        */
+       xfs_ail_push_all_sync(mp->m_ail);
+
+       /*
+        * And reclaim all inodes.  At this point there should be no dirty
+        * inode, and none should be pinned or locked, but use synchronous
+        * reclaim just to be sure.
         */
-       xfs_reclaim_inodes(mp, 0);
-       xfs_flush_buftarg(mp->m_ddev_targp, 1);
        xfs_reclaim_inodes(mp, SYNC_WAIT);
 
        xfs_qm_unmount(mp);
        if (error)
                xfs_warn(mp, "Unable to update superblock counters. "
                                "Freespace may not be correct on next mount.");
-       xfs_unmountfs_writesb(mp);
 
        /*
-        * Make sure all buffers have been flushed and completed before
-        * unmounting the log.
+        * At this point we might have modified the superblock again and thus
+        * added an item to the AIL, thus flush it again.
         */
-       error = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-       if (error)
-               xfs_warn(mp, "%d busy buffers during unmount.", error);
+       xfs_ail_push_all_sync(mp->m_ail);
        xfs_wait_buftarg(mp->m_ddev_targp);
 
        xfs_log_unmount_write(mp);
        return error;
 }
 
-int
-xfs_unmountfs_writesb(xfs_mount_t *mp)
-{
-       xfs_buf_t       *sbp;
-       int             error = 0;
-
-       /*
-        * skip superblock write if fs is read-only, or
-        * if we are doing a forced umount.
-        */
-       if (!((mp->m_flags & XFS_MOUNT_RDONLY) ||
-               XFS_FORCED_SHUTDOWN(mp))) {
-
-               sbp = xfs_getsb(mp, 0);
-
-               XFS_BUF_UNDONE(sbp);
-               XFS_BUF_UNREAD(sbp);
-               xfs_buf_delwri_dequeue(sbp);
-               XFS_BUF_WRITE(sbp);
-               XFS_BUF_UNASYNC(sbp);
-               ASSERT(sbp->b_target == mp->m_ddev_targp);
-               xfsbdstrat(mp, sbp);
-               error = xfs_buf_iowait(sbp);
-               if (error)
-                       xfs_buf_ioerror_alert(sbp, __func__);
-               xfs_buf_relse(sbp);
-       }
-       return error;
-}
-
 /*
  * xfs_mod_sb() can be used to copy arbitrary changes to the
  * in-core superblock into the superblock buffer to be logged.
 
 extern int     xfs_mountfs(xfs_mount_t *mp);
 
 extern void    xfs_unmountfs(xfs_mount_t *);
-extern int     xfs_unmountfs_writesb(xfs_mount_t *);
 extern int     xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
 extern int     xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
                        uint, int);
 
        return error;
 }
 
-STATIC int
-xfs_sync_inode_attr(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       int                     error = 0;
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       if (xfs_inode_clean(ip))
-               goto out_unlock;
-       if (!xfs_iflock_nowait(ip)) {
-               if (!(flags & SYNC_WAIT))
-                       goto out_unlock;
-               xfs_iflock(ip);
-       }
-
-       if (xfs_inode_clean(ip)) {
-               xfs_ifunlock(ip);
-               goto out_unlock;
-       }
-
-       error = xfs_iflush(ip, flags);
-
-       /*
-        * We don't want to try again on non-blocking flushes that can't run
-        * again immediately. If an inode really must be written, then that's
-        * what the SYNC_WAIT flag is for.
-        */
-       if (error == EAGAIN) {
-               ASSERT(!(flags & SYNC_WAIT));
-               error = 0;
-       }
-
- out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       return error;
-}
-
 /*
  * Write out pagecache data for the whole filesystem.
  */
        return 0;
 }
 
-/*
- * Write out inode metadata (attributes) for the whole filesystem.
- */
-STATIC int
-xfs_sync_attr(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       ASSERT((flags & ~SYNC_WAIT) == 0);
-
-       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
-}
-
 STATIC int
 xfs_sync_fsdata(
        struct xfs_mount        *mp)
  * First stage of freeze - no writers will make progress now we are here,
  * so we flush delwri and delalloc buffers here, then wait for all I/O to
  * complete.  Data is frozen at that point. Metadata is not frozen,
- * transactions can still occur here so don't bother flushing the buftarg
+ * transactions can still occur here so don't bother emptying the AIL
  * because it'll just get dirty again.
  */
 int
        return error ? error : error2;
 }
 
-STATIC void
-xfs_quiesce_fs(
-       struct xfs_mount        *mp)
-{
-       int     count = 0, pincount;
-
-       xfs_reclaim_inodes(mp, 0);
-       xfs_flush_buftarg(mp->m_ddev_targp, 0);
-
-       /*
-        * This loop must run at least twice.  The first instance of the loop
-        * will flush most meta data but that will generate more meta data
-        * (typically directory updates).  Which then must be flushed and
-        * logged before we can write the unmount record. We also so sync
-        * reclaim of inodes to catch any that the above delwri flush skipped.
-        */
-       do {
-               xfs_reclaim_inodes(mp, SYNC_WAIT);
-               xfs_sync_attr(mp, SYNC_WAIT);
-               pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-               if (!pincount) {
-                       delay(50);
-                       count++;
-               }
-       } while (count < 2);
-}
-
 /*
  * Second stage of a quiesce. The data is already synced, now we have to take
  * care of the metadata. New transactions are already blocked, so we need to
        while (atomic_read(&mp->m_active_trans) > 0)
                delay(100);
 
-       /* flush inodes and push all remaining buffers out to disk */
-       xfs_quiesce_fs(mp);
+       /* reclaim inodes to do any IO before the freeze completes */
+       xfs_reclaim_inodes(mp, 0);
+       xfs_reclaim_inodes(mp, SYNC_WAIT);
+
+       /* flush all pending changes from the AIL */
+       xfs_ail_push_all_sync(mp->m_ail);
 
        /*
         * Just warn here till VFS can correctly support
                xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
                                "Frozen image may not be consistent.");
        xfs_log_unmount_write(mp);
-       xfs_unmountfs_writesb(mp);
+
+       /*
+        * At this point we might have modified the superblock again and thus
+        * added an item to the AIL, thus flush it again.
+        */
+       xfs_ail_push_all_sync(mp->m_ail);
 }
 
 static void
 
                spin_lock(&ailp->xa_lock);
        }
 
-       target = ailp->xa_target;
        lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
-       if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
+       if (!lip) {
                /*
                 * AIL is empty or our push has reached the end.
                 */
         * lots of contention on the AIL lists.
         */
        lsn = lip->li_lsn;
+       target = ailp->xa_target;
        while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
                int     lock_result;
                /*
                }
 
                spin_lock(&ailp->xa_lock);
-               /* should we bother continuing? */
-               if (XFS_FORCED_SHUTDOWN(mp))
-                       break;
-               ASSERT(mp->m_log);
-
                count++;
 
                /*
                xfs_ail_push(ailp, threshold_lsn);
 }
 
+/*
+ * Push out all items in the AIL immediately and wait until the AIL is empty.
+ */
+void
+xfs_ail_push_all_sync(
+       struct xfs_ail  *ailp)
+{
+       struct xfs_log_item     *lip;
+       DEFINE_WAIT(wait);
+
+       spin_lock(&ailp->xa_lock);
+       while ((lip = xfs_ail_max(ailp)) != NULL) {
+               prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE);
+               ailp->xa_target = lip->li_lsn;
+               wake_up_process(ailp->xa_task);
+               spin_unlock(&ailp->xa_lock);
+               schedule();
+               spin_lock(&ailp->xa_lock);
+       }
+       spin_unlock(&ailp->xa_lock);
+
+       finish_wait(&ailp->xa_empty, &wait);
+}
+
 /*
  * xfs_trans_ail_update - bulk AIL insertion operation.
  *
        if (mlip_changed) {
                if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
                        xlog_assign_tail_lsn_locked(ailp->xa_mount);
+               if (list_empty(&ailp->xa_ail))
+                       wake_up_all(&ailp->xa_empty);
                spin_unlock(&ailp->xa_lock);
 
                xfs_log_space_wake(ailp->xa_mount);
        INIT_LIST_HEAD(&ailp->xa_ail);
        INIT_LIST_HEAD(&ailp->xa_cursors);
        spin_lock_init(&ailp->xa_lock);
+       init_waitqueue_head(&ailp->xa_empty);
 
        ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
                        ailp->xa_mount->m_fsname);
 
        spinlock_t              xa_lock;
        xfs_lsn_t               xa_last_pushed_lsn;
        int                     xa_log_flush;
+       wait_queue_head_t       xa_empty;
 };
 
 /*
 
 void                   xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
 void                   xfs_ail_push_all(struct xfs_ail *);
+void                   xfs_ail_push_all_sync(struct xfs_ail *);
 struct xfs_log_item    *xfs_ail_min(struct xfs_ail  *ailp);
 xfs_lsn_t              xfs_ail_min_lsn(struct xfs_ail *ailp);