* done item to release the intent item; and then log a new intent item.
  * The caller should provide a fresh transaction and roll it after we're done.
  */
-static int
+static void
 xfs_defer_relog(
        struct xfs_trans                **tpp,
        struct list_head                *dfops)
 
                xfs_defer_relog_intent(*tpp, dfp);
        }
-
-       if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
-               return xfs_defer_trans_roll(tpp);
-       return 0;
 }
 
 /*
        return error;
 }
 
+/* Move all paused deferred work from @tp to @paused_list. */
+static void
+xfs_defer_isolate_paused(
+       struct xfs_trans                *tp,
+       struct list_head                *paused_list)
+{
+       struct xfs_defer_pending        *dfp;
+       struct xfs_defer_pending        *pli;
+
+       list_for_each_entry_safe(dfp, pli, &tp->t_dfops, dfp_list) {
+               if (!(dfp->dfp_flags & XFS_DEFER_PAUSED))
+                       continue;
+
+               list_move_tail(&dfp->dfp_list, paused_list);
+               trace_xfs_defer_isolate_paused(tp->t_mountp, dfp);
+       }
+}
+
 /*
  * Finish all the pending work.  This involves logging intent items for
  * any work items that wandered in since the last transaction roll (if
        struct xfs_defer_pending        *dfp = NULL;
        int                             error = 0;
        LIST_HEAD(dop_pending);
+       LIST_HEAD(dop_paused);
 
        ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
 
                 */
                int has_intents = xfs_defer_create_intents(*tp);
 
+               xfs_defer_isolate_paused(*tp, &dop_paused);
+
                list_splice_init(&(*tp)->t_dfops, &dop_pending);
 
                if (has_intents < 0) {
                                goto out_shutdown;
 
                        /* Relog intent items to keep the log moving. */
-                       error = xfs_defer_relog(tp, &dop_pending);
-                       if (error)
-                               goto out_shutdown;
+                       xfs_defer_relog(tp, &dop_pending);
+                       xfs_defer_relog(tp, &dop_paused);
+
+                       if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
+                               error = xfs_defer_trans_roll(tp);
+                               if (error)
+                                       goto out_shutdown;
+                       }
                }
 
-               dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
-                                      dfp_list);
+               dfp = list_first_entry_or_null(&dop_pending,
+                               struct xfs_defer_pending, dfp_list);
+               if (!dfp)
+                       break;
                error = xfs_defer_finish_one(*tp, dfp);
                if (error && error != -EAGAIN)
                        goto out_shutdown;
        }
 
+       /* Requeue the paused items in the outgoing transaction. */
+       list_splice_tail_init(&dop_paused, &(*tp)->t_dfops);
+
        trace_xfs_defer_finish_done(*tp, _RET_IP_);
        return 0;
 
 out_shutdown:
+       list_splice_tail_init(&dop_paused, &dop_pending);
        xfs_defer_trans_abort(*tp, &dop_pending);
        xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
        trace_xfs_defer_finish_error(*tp, error);
 xfs_defer_finish(
        struct xfs_trans        **tp)
 {
+#ifdef DEBUG
+       struct xfs_defer_pending *dfp;
+#endif
        int                     error;
 
        /*
        }
 
        /* Reset LOWMODE now that we've finished all the dfops. */
-       ASSERT(list_empty(&(*tp)->t_dfops));
+#ifdef DEBUG
+       list_for_each_entry(dfp, &(*tp)->t_dfops, dfp_list)
+               ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED);
+#endif
        (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
        return 0;
 }
        struct xfs_mount        *mp = tp->t_mountp;
 
        trace_xfs_defer_cancel(tp, _RET_IP_);
+       xfs_defer_trans_abort(tp, &tp->t_dfops);
        xfs_defer_cancel_list(mp, &tp->t_dfops);
 }
 
        if (dfp->dfp_intent)
                return false;
 
+       /* Paused items cannot absorb more work */
+       if (dfp->dfp_flags & XFS_DEFER_PAUSED)
+               return NULL;
+
        /* Already full? */
        if (ops->max_items && dfp->dfp_count >= ops->max_items)
                return false;
 }
 
 /* Add an item for later deferred processing. */
-void
+struct xfs_defer_pending *
 xfs_defer_add(
        struct xfs_trans                *tp,
        enum xfs_defer_ops_type         type,
 
        xfs_defer_add_item(dfp, li);
        trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
+       return dfp;
 }
 
 /*
        xfs_rmap_intent_destroy_cache();
        xfs_defer_destroy_cache();
 }
+
+/*
+ * Mark a deferred work item so that it will be requeued indefinitely without
+ * being finished.  Caller must ensure there are no data dependencies on this
+ * work item in the meantime.
+ */
+void
+xfs_defer_item_pause(
+       struct xfs_trans                *tp,
+       struct xfs_defer_pending        *dfp)
+{
+       ASSERT(!(dfp->dfp_flags & XFS_DEFER_PAUSED));
+
+       dfp->dfp_flags |= XFS_DEFER_PAUSED;
+
+       trace_xfs_defer_item_pause(tp->t_mountp, dfp);
+}
+
+/*
+ * Release a paused deferred work item so that it will be finished during the
+ * next transaction roll.
+ */
+void
+xfs_defer_item_unpause(
+       struct xfs_trans                *tp,
+       struct xfs_defer_pending        *dfp)
+{
+       ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED);
+
+       dfp->dfp_flags &= ~XFS_DEFER_PAUSED;
+
+       trace_xfs_defer_item_unpause(tp->t_mountp, dfp);
+}
 
        struct xfs_log_item             *dfp_intent;    /* log intent item */
        struct xfs_log_item             *dfp_done;      /* log done item */
        unsigned int                    dfp_count;      /* # extent items */
+       unsigned int                    dfp_flags;
        enum xfs_defer_ops_type         dfp_type;
 };
 
-void xfs_defer_add(struct xfs_trans *tp, enum xfs_defer_ops_type type,
-               struct list_head *h);
+/*
+ * Create a log intent item for this deferred item, but don't actually finish
+ * the work.  Caller must clear this before the final transaction commit.
+ */
+#define XFS_DEFER_PAUSED       (1U << 0)
+
+#define XFS_DEFER_PENDING_STRINGS \
+       { XFS_DEFER_PAUSED,     "paused" }
+
+void xfs_defer_item_pause(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
+void xfs_defer_item_unpause(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
+
+struct xfs_defer_pending *xfs_defer_add(struct xfs_trans *tp,
+               enum xfs_defer_ops_type type, struct list_head *h);
 int xfs_defer_finish_noroll(struct xfs_trans **tp);
 int xfs_defer_finish(struct xfs_trans **tp);
 int xfs_defer_finish_one(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
 
                __field(dev_t, dev)
                __field(int, type)
                __field(void *, intent)
+               __field(unsigned int, flags)
                __field(char, committed)
                __field(int, nr)
        ),
                __entry->dev = mp ? mp->m_super->s_dev : 0;
                __entry->type = dfp->dfp_type;
                __entry->intent = dfp->dfp_intent;
+               __entry->flags = dfp->dfp_flags;
                __entry->committed = dfp->dfp_done != NULL;
                __entry->nr = dfp->dfp_count;
        ),
-       TP_printk("dev %d:%d optype %d intent %p committed %d nr %d",
+       TP_printk("dev %d:%d optype %d intent %p flags %s committed %d nr %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->type,
                  __entry->intent,
+                 __print_flags(__entry->flags, "|", XFS_DEFER_PENDING_STRINGS),
                  __entry->committed,
                  __entry->nr)
 )
 DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
 DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
 DEFINE_DEFER_PENDING_EVENT(xfs_defer_relog_intent);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_isolate_paused);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_pause);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_unpause);
 
 #define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
 DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);
                __field(void *, intent)
                __field(void *, item)
                __field(char, committed)
+               __field(unsigned int, flags)
                __field(int, nr)
        ),
        TP_fast_assign(
                __entry->intent = dfp->dfp_intent;
                __entry->item = item;
                __entry->committed = dfp->dfp_done != NULL;
+               __entry->flags = dfp->dfp_flags;
                __entry->nr = dfp->dfp_count;
        ),
-       TP_printk("dev %d:%d optype %d intent %p item %p committed %d nr %d",
+       TP_printk("dev %d:%d optype %d intent %p item %p flags %s committed %d nr %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->type,
                  __entry->intent,
                  __entry->item,
+                 __print_flags(__entry->flags, "|", XFS_DEFER_PENDING_STRINGS),
                  __entry->committed,
                  __entry->nr)
 )