#include "xfs_rtalloc.h"
 
 /* Convert an xfs_fsmap to an fsmap. */
-void
+static void
 xfs_fsmap_from_internal(
        struct fsmap            *dest,
        struct xfs_fsmap        *src)
 /* getfsmap query state */
 struct xfs_getfsmap_info {
        struct xfs_fsmap_head   *head;
-       xfs_fsmap_format_t      formatter;      /* formatting fn */
-       void                    *format_arg;    /* format buffer */
+       struct fsmap            *fsmap_recs;    /* mapping records */
        struct xfs_buf          *agf_bp;        /* AGF, for refcount queries */
        xfs_daddr_t             next_daddr;     /* next daddr we expect */
        u64                     missing_owner;  /* owner of holes */
        return 0;
 }
 
+static inline void
+xfs_getfsmap_format(
+       struct xfs_mount                *mp,
+       struct xfs_fsmap                *xfm,
+       struct xfs_getfsmap_info        *info)
+{
+       struct fsmap                    *rec;
+
+       trace_xfs_getfsmap_mapping(mp, xfm);
+
+       rec = &info->fsmap_recs[info->head->fmh_entries++];
+       xfs_fsmap_from_internal(rec, xfm);
+}
+
 /*
  * Format a reverse mapping for getfsmap, having translated rm_startblock
  * into the appropriate daddr units.
                fmr.fmr_offset = 0;
                fmr.fmr_length = rec_daddr - info->next_daddr;
                fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
-               error = info->formatter(&fmr, info->format_arg);
-               if (error)
-                       return error;
-               info->head->fmh_entries++;
+               xfs_getfsmap_format(mp, &fmr, info);
        }
 
        if (info->last)
                if (shared)
                        fmr.fmr_flags |= FMR_OF_SHARED;
        }
-       error = info->formatter(&fmr, info->format_arg);
-       if (error)
-               return error;
-       info->head->fmh_entries++;
 
+       xfs_getfsmap_format(mp, &fmr, info);
 out:
        rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
        if (info->next_daddr < rec_daddr)
 #endif /* CONFIG_XFS_RT */
 
 /*
- * Get filesystem's extents as described in head, and format for
- * output.  Calls formatter to fill the user's buffer until all
- * extents are mapped, until the passed-in head->fmh_count slots have
- * been filled, or until the formatter short-circuits the loop, if it
- * is tracking filled-in extents on its own.
+ * Get filesystem's extents as described in head, and format for output. Fills
+ * in the supplied records array until there are no more reverse mappings to
+ * return or head.fmh_entries == head.fmh_count.  In the second case, this
+ * function returns -ECANCELED to indicate that more records would have been
+ * returned.
  *
  * Key to Confusion
  * ----------------
 xfs_getfsmap(
        struct xfs_mount                *mp,
        struct xfs_fsmap_head           *head,
-       xfs_fsmap_format_t              formatter,
-       void                            *arg)
+       struct fsmap                    *fsmap_recs)
 {
        struct xfs_trans                *tp = NULL;
        struct xfs_fsmap                dkeys[2];       /* per-dev keys */
 
        info.next_daddr = head->fmh_keys[0].fmr_physical +
                          head->fmh_keys[0].fmr_length;
-       info.formatter = formatter;
-       info.format_arg = arg;
+       info.fsmap_recs = fsmap_recs;
        info.head = head;
 
        /*
 
        return error;
 }
 
-struct getfsmap_info {
-       struct xfs_mount        *mp;
-       struct fsmap_head __user *data;
-       unsigned int            idx;
-       __u32                   last_flags;
-};
-
-STATIC int
-xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
-{
-       struct getfsmap_info    *info = priv;
-       struct fsmap            fm;
-
-       trace_xfs_getfsmap_mapping(info->mp, xfm);
-
-       info->last_flags = xfm->fmr_flags;
-       xfs_fsmap_from_internal(&fm, xfm);
-       if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm,
-                       sizeof(struct fsmap)))
-               return -EFAULT;
-
-       return 0;
-}
-
 STATIC int
 xfs_ioc_getfsmap(
        struct xfs_inode        *ip,
        struct fsmap_head       __user *arg)
 {
-       struct getfsmap_info    info = { NULL };
        struct xfs_fsmap_head   xhead = {0};
        struct fsmap_head       head;
-       bool                    aborted = false;
+       struct fsmap            *recs;
+       unsigned int            count;
+       __u32                   last_flags = 0;
+       bool                    done = false;
        int                     error;
 
        if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
                       sizeof(head.fmh_keys[1].fmr_reserved)))
                return -EINVAL;
 
+       /*
+        * Use an internal memory buffer so that we don't have to copy fsmap
+        * data to userspace while holding locks.  Start by trying to allocate
+        * up to 128k for the buffer, but fall back to a single page if needed.
+        */
+       count = min_t(unsigned int, head.fmh_count,
+                       131072 / sizeof(struct fsmap));
+       recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+       if (!recs) {
+               count = min_t(unsigned int, head.fmh_count,
+                               PAGE_SIZE / sizeof(struct fsmap));
+               recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+               if (!recs)
+                       return -ENOMEM;
+       }
+
        xhead.fmh_iflags = head.fmh_iflags;
-       xhead.fmh_count = head.fmh_count;
        xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
        xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
 
        trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
        trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
 
-       info.mp = ip->i_mount;
-       info.data = arg;
-       error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
-       if (error == -ECANCELED) {
-               error = 0;
-               aborted = true;
-       } else if (error)
-               return error;
+       head.fmh_entries = 0;
+       do {
+               struct fsmap __user     *user_recs;
+               struct fsmap            *last_rec;
+
+               user_recs = &arg->fmh_recs[head.fmh_entries];
+               xhead.fmh_entries = 0;
+               xhead.fmh_count = min_t(unsigned int, count,
+                                       head.fmh_count - head.fmh_entries);
+
+               /* Run query, record how many entries we got. */
+               error = xfs_getfsmap(ip->i_mount, &xhead, recs);
+               switch (error) {
+               case 0:
+                       /*
+                        * There are no more records in the result set.  Copy
+                        * whatever we got to userspace and break out.
+                        */
+                       done = true;
+                       break;
+               case -ECANCELED:
+                       /*
+                        * The internal memory buffer is full.  Copy whatever
+                        * records we got to userspace and go again if we have
+                        * not yet filled the userspace buffer.
+                        */
+                       error = 0;
+                       break;
+               default:
+                       goto out_free;
+               }
+               head.fmh_entries += xhead.fmh_entries;
+               head.fmh_oflags = xhead.fmh_oflags;
 
-       /* If we didn't abort, set the "last" flag in the last fmx */
-       if (!aborted && info.idx) {
-               info.last_flags |= FMR_OF_LAST;
-               if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags,
-                               &info.last_flags, sizeof(info.last_flags)))
-                       return -EFAULT;
+               /*
+                * If the caller wanted a record count or there aren't any
+                * new records to return, we're done.
+                */
+               if (head.fmh_count == 0 || xhead.fmh_entries == 0)
+                       break;
+
+               /* Copy all the records we got out to userspace. */
+               if (copy_to_user(user_recs, recs,
+                                xhead.fmh_entries * sizeof(struct fsmap))) {
+                       error = -EFAULT;
+                       goto out_free;
+               }
+
+               /* Remember the last record flags we copied to userspace. */
+               last_rec = &recs[xhead.fmh_entries - 1];
+               last_flags = last_rec->fmr_flags;
+
+               /* Set up the low key for the next iteration. */
+               xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
+               trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
+       } while (!done && head.fmh_entries < head.fmh_count);
+
+       /*
+        * If there are no more records in the query result set and we're not
+        * in counting mode, mark the last record returned with the LAST flag.
+        */
+       if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
+               struct fsmap __user     *user_rec;
+
+               last_flags |= FMR_OF_LAST;
+               user_rec = &arg->fmh_recs[head.fmh_entries - 1];
+
+               if (copy_to_user(&user_rec->fmr_flags, &last_flags,
+                                       sizeof(last_flags))) {
+                       error = -EFAULT;
+                       goto out_free;
+               }
        }
 
        /* copy back header */
-       head.fmh_entries = xhead.fmh_entries;
-       head.fmh_oflags = xhead.fmh_oflags;
-       if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
-               return -EFAULT;
+       if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
+               error = -EFAULT;
+               goto out_free;
+       }
 
-       return 0;
+out_free:
+       kmem_free(recs);
+       return error;
 }
 
 STATIC int