If unsure, say N.
 
+config NFSD_PNFS
+       bool "NFSv4.1 server support for Parallel NFS (pNFS)"
+       depends on NFSD_V4
+       help
+         This option enables support for the parallel NFS features of the
+         minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
+         server.
+
+         If unsure, say N.
+
 config NFSD_V4_SECURITY_LABEL
        bool "Provide Security Label support for NFSv4 server"
        depends on NFSD_V4 && SECURITY
 
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
                           nfs4acl.o nfs4callback.o nfs4recover.o
+nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
 
 #include "nfsd.h"
 #include "nfsfh.h"
 #include "netns.h"
+#include "pnfs.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_EXPORT
 
 
        exp.ex_client = dom;
        exp.cd = cd;
+       exp.ex_devid_map = NULL;
 
        /* expiry */
        err = -EINVAL;
                if (!gid_valid(exp.ex_anon_gid))
                        goto out4;
                err = 0;
+
+               nfsd4_setup_layout_type(&exp);
        }
 
        expp = svc_export_lookup(&exp);
        new->ex_fslocs.locations = NULL;
        new->ex_fslocs.locations_count = 0;
        new->ex_fslocs.migrated = 0;
+       new->ex_layout_type = 0;
        new->ex_uuid = NULL;
        new->cd = item->cd;
 }
        new->ex_anon_uid = item->ex_anon_uid;
        new->ex_anon_gid = item->ex_anon_gid;
        new->ex_fsid = item->ex_fsid;
+       new->ex_devid_map = item->ex_devid_map;
+       item->ex_devid_map = NULL;
        new->ex_uuid = item->ex_uuid;
        item->ex_uuid = NULL;
        new->ex_fslocs.locations = item->ex_fslocs.locations;
        item->ex_fslocs.locations_count = 0;
        new->ex_fslocs.migrated = item->ex_fslocs.migrated;
        item->ex_fslocs.migrated = 0;
+       new->ex_layout_type = item->ex_layout_type;
        new->ex_nflavors = item->ex_nflavors;
        for (i = 0; i < MAX_SECINFO_LIST; i++) {
                new->ex_flavors[i] = item->ex_flavors[i];
 
        struct nfsd4_fs_locations ex_fslocs;
        uint32_t                ex_nflavors;
        struct exp_flavor_info  ex_flavors[MAX_SECINFO_LIST];
+       enum pnfs_layouttype    ex_layout_type;
+       struct nfsd4_deviceid_map *ex_devid_map;
        struct cache_detail     *cd;
 };
 
 
--- /dev/null
+/*
+ * Copyright (c) 2014 Christoph Hellwig.
+ */
+#include <linux/jhash.h>
+#include <linux/sched.h>
+
+#include "pnfs.h"
+#include "netns.h"
+
+#define NFSDDBG_FACILITY                NFSDDBG_PNFS
+
+struct nfs4_layout {
+       struct list_head                lo_perstate;
+       struct nfs4_layout_stateid      *lo_state;
+       struct nfsd4_layout_seg         lo_seg;
+};
+
+static struct kmem_cache *nfs4_layout_cache;
+static struct kmem_cache *nfs4_layout_stateid_cache;
+
+const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] =  {
+};
+
+/* pNFS device ID to export fsid mapping */
+#define DEVID_HASH_BITS        8
+#define DEVID_HASH_SIZE        (1 << DEVID_HASH_BITS)
+#define DEVID_HASH_MASK        (DEVID_HASH_SIZE - 1)
+static u64 nfsd_devid_seq = 1;
+static struct list_head nfsd_devid_hash[DEVID_HASH_SIZE];
+static DEFINE_SPINLOCK(nfsd_devid_lock);
+
+static inline u32 devid_hashfn(u64 idx)
+{
+       return jhash_2words(idx, idx >> 32, 0) & DEVID_HASH_MASK;
+}
+
+static void
+nfsd4_alloc_devid_map(const struct svc_fh *fhp)
+{
+       const struct knfsd_fh *fh = &fhp->fh_handle;
+       size_t fsid_len = key_len(fh->fh_fsid_type);
+       struct nfsd4_deviceid_map *map, *old;
+       int i;
+
+       map = kzalloc(sizeof(*map) + fsid_len, GFP_KERNEL);
+       if (!map)
+               return;
+
+       map->fsid_type = fh->fh_fsid_type;
+       memcpy(&map->fsid, fh->fh_fsid, fsid_len);
+
+       spin_lock(&nfsd_devid_lock);
+       if (fhp->fh_export->ex_devid_map)
+               goto out_unlock;
+
+       for (i = 0; i < DEVID_HASH_SIZE; i++) {
+               list_for_each_entry(old, &nfsd_devid_hash[i], hash) {
+                       if (old->fsid_type != fh->fh_fsid_type)
+                               continue;
+                       if (memcmp(old->fsid, fh->fh_fsid,
+                                       key_len(old->fsid_type)))
+                               continue;
+
+                       fhp->fh_export->ex_devid_map = old;
+                       goto out_unlock;
+               }
+       }
+
+       map->idx = nfsd_devid_seq++;
+       list_add_tail_rcu(&map->hash, &nfsd_devid_hash[devid_hashfn(map->idx)]);
+       fhp->fh_export->ex_devid_map = map;
+       map = NULL;
+
+out_unlock:
+       spin_unlock(&nfsd_devid_lock);
+       kfree(map);
+}
+
+struct nfsd4_deviceid_map *
+nfsd4_find_devid_map(int idx)
+{
+       struct nfsd4_deviceid_map *map, *ret = NULL;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(map, &nfsd_devid_hash[devid_hashfn(idx)], hash)
+               if (map->idx == idx)
+                       ret = map;
+       rcu_read_unlock();
+
+       return ret;
+}
+
+int
+nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
+               u32 device_generation)
+{
+       if (!fhp->fh_export->ex_devid_map) {
+               nfsd4_alloc_devid_map(fhp);
+               if (!fhp->fh_export->ex_devid_map)
+                       return -ENOMEM;
+       }
+
+       id->fsid_idx = fhp->fh_export->ex_devid_map->idx;
+       id->generation = device_generation;
+       id->pad = 0;
+       return 0;
+}
+
+void nfsd4_setup_layout_type(struct svc_export *exp)
+{
+       if (exp->ex_flags & NFSEXP_NOPNFS)
+               return;
+}
+
+static void
+nfsd4_free_layout_stateid(struct nfs4_stid *stid)
+{
+       struct nfs4_layout_stateid *ls = layoutstateid(stid);
+       struct nfs4_client *clp = ls->ls_stid.sc_client;
+       struct nfs4_file *fp = ls->ls_stid.sc_file;
+
+       spin_lock(&clp->cl_lock);
+       list_del_init(&ls->ls_perclnt);
+       spin_unlock(&clp->cl_lock);
+
+       spin_lock(&fp->fi_lock);
+       list_del_init(&ls->ls_perfile);
+       spin_unlock(&fp->fi_lock);
+
+       kmem_cache_free(nfs4_layout_stateid_cache, ls);
+}
+
+static struct nfs4_layout_stateid *
+nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
+               struct nfs4_stid *parent, u32 layout_type)
+{
+       struct nfs4_client *clp = cstate->clp;
+       struct nfs4_file *fp = parent->sc_file;
+       struct nfs4_layout_stateid *ls;
+       struct nfs4_stid *stp;
+
+       stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache);
+       if (!stp)
+               return NULL;
+       stp->sc_free = nfsd4_free_layout_stateid;
+       get_nfs4_file(fp);
+       stp->sc_file = fp;
+
+       ls = layoutstateid(stp);
+       INIT_LIST_HEAD(&ls->ls_perclnt);
+       INIT_LIST_HEAD(&ls->ls_perfile);
+       spin_lock_init(&ls->ls_lock);
+       INIT_LIST_HEAD(&ls->ls_layouts);
+       ls->ls_layout_type = layout_type;
+
+       spin_lock(&clp->cl_lock);
+       stp->sc_type = NFS4_LAYOUT_STID;
+       list_add(&ls->ls_perclnt, &clp->cl_lo_states);
+       spin_unlock(&clp->cl_lock);
+
+       spin_lock(&fp->fi_lock);
+       list_add(&ls->ls_perfile, &fp->fi_lo_states);
+       spin_unlock(&fp->fi_lock);
+
+       return ls;
+}
+
+__be32
+nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate, stateid_t *stateid,
+               bool create, u32 layout_type, struct nfs4_layout_stateid **lsp)
+{
+       struct nfs4_layout_stateid *ls;
+       struct nfs4_stid *stid;
+       unsigned char typemask = NFS4_LAYOUT_STID;
+       __be32 status;
+
+       if (create)
+               typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
+
+       status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
+                       net_generic(SVC_NET(rqstp), nfsd_net_id));
+       if (status)
+               goto out;
+
+       if (!fh_match(&cstate->current_fh.fh_handle,
+                     &stid->sc_file->fi_fhandle)) {
+               status = nfserr_bad_stateid;
+               goto out_put_stid;
+       }
+
+       if (stid->sc_type != NFS4_LAYOUT_STID) {
+               ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
+               nfs4_put_stid(stid);
+
+               status = nfserr_jukebox;
+               if (!ls)
+                       goto out;
+       } else {
+               ls = container_of(stid, struct nfs4_layout_stateid, ls_stid);
+
+               status = nfserr_bad_stateid;
+               if (stateid->si_generation > stid->sc_stateid.si_generation)
+                       goto out_put_stid;
+               if (layout_type != ls->ls_layout_type)
+                       goto out_put_stid;
+       }
+
+       *lsp = ls;
+       return 0;
+
+out_put_stid:
+       nfs4_put_stid(stid);
+out:
+       return status;
+}
+
+static inline u64
+layout_end(struct nfsd4_layout_seg *seg)
+{
+       u64 end = seg->offset + seg->length;
+       return end >= seg->offset ? end : NFS4_MAX_UINT64;
+}
+
+static void
+layout_update_len(struct nfsd4_layout_seg *lo, u64 end)
+{
+       if (end == NFS4_MAX_UINT64)
+               lo->length = NFS4_MAX_UINT64;
+       else
+               lo->length = end - lo->offset;
+}
+
+static bool
+layouts_overlapping(struct nfs4_layout *lo, struct nfsd4_layout_seg *s)
+{
+       if (s->iomode != IOMODE_ANY && s->iomode != lo->lo_seg.iomode)
+               return false;
+       if (layout_end(&lo->lo_seg) <= s->offset)
+               return false;
+       if (layout_end(s) <= lo->lo_seg.offset)
+               return false;
+       return true;
+}
+
+static bool
+layouts_try_merge(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *new)
+{
+       if (lo->iomode != new->iomode)
+               return false;
+       if (layout_end(new) < lo->offset)
+               return false;
+       if (layout_end(lo) < new->offset)
+               return false;
+
+       lo->offset = min(lo->offset, new->offset);
+       layout_update_len(lo, max(layout_end(lo), layout_end(new)));
+       return true;
+}
+
+__be32
+nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
+{
+       struct nfsd4_layout_seg *seg = &lgp->lg_seg;
+       struct nfs4_layout *lp, *new = NULL;
+
+       spin_lock(&ls->ls_lock);
+       list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
+               if (layouts_try_merge(&lp->lo_seg, seg))
+                       goto done;
+       }
+       spin_unlock(&ls->ls_lock);
+
+       new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
+       if (!new)
+               return nfserr_jukebox;
+       memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
+       new->lo_state = ls;
+
+       spin_lock(&ls->ls_lock);
+       list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
+               if (layouts_try_merge(&lp->lo_seg, seg))
+                       goto done;
+       }
+
+       atomic_inc(&ls->ls_stid.sc_count);
+       list_add_tail(&new->lo_perstate, &ls->ls_layouts);
+       new = NULL;
+done:
+       update_stateid(&ls->ls_stid.sc_stateid);
+       memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
+       spin_unlock(&ls->ls_lock);
+       if (new)
+               kmem_cache_free(nfs4_layout_cache, new);
+       return nfs_ok;
+}
+
+static void
+nfsd4_free_layouts(struct list_head *reaplist)
+{
+       while (!list_empty(reaplist)) {
+               struct nfs4_layout *lp = list_first_entry(reaplist,
+                               struct nfs4_layout, lo_perstate);
+
+               list_del(&lp->lo_perstate);
+               nfs4_put_stid(&lp->lo_state->ls_stid);
+               kmem_cache_free(nfs4_layout_cache, lp);
+       }
+}
+
+static void
+nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
+               struct list_head *reaplist)
+{
+       struct nfsd4_layout_seg *lo = &lp->lo_seg;
+       u64 end = layout_end(lo);
+
+       if (seg->offset <= lo->offset) {
+               if (layout_end(seg) >= end) {
+                       list_move_tail(&lp->lo_perstate, reaplist);
+                       return;
+               }
+               end = seg->offset;
+       } else {
+               /* retain the whole layout segment on a split. */
+               if (layout_end(seg) < end) {
+                       dprintk("%s: split not supported\n", __func__);
+                       return;
+               }
+
+               lo->offset = layout_end(seg);
+       }
+
+       layout_update_len(lo, end);
+}
+
+__be32
+nfsd4_return_file_layouts(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate,
+               struct nfsd4_layoutreturn *lrp)
+{
+       struct nfs4_layout_stateid *ls;
+       struct nfs4_layout *lp, *n;
+       LIST_HEAD(reaplist);
+       __be32 nfserr;
+       int found = 0;
+
+       nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lrp->lr_sid,
+                                               false, lrp->lr_layout_type,
+                                               &ls);
+       if (nfserr)
+               return nfserr;
+
+       spin_lock(&ls->ls_lock);
+       list_for_each_entry_safe(lp, n, &ls->ls_layouts, lo_perstate) {
+               if (layouts_overlapping(lp, &lrp->lr_seg)) {
+                       nfsd4_return_file_layout(lp, &lrp->lr_seg, &reaplist);
+                       found++;
+               }
+       }
+       if (!list_empty(&ls->ls_layouts)) {
+               if (found) {
+                       update_stateid(&ls->ls_stid.sc_stateid);
+                       memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid,
+                               sizeof(stateid_t));
+               }
+               lrp->lrs_present = 1;
+       } else {
+               nfs4_unhash_stid(&ls->ls_stid);
+               lrp->lrs_present = 0;
+       }
+       spin_unlock(&ls->ls_lock);
+
+       nfs4_put_stid(&ls->ls_stid);
+       nfsd4_free_layouts(&reaplist);
+       return nfs_ok;
+}
+
+__be32
+nfsd4_return_client_layouts(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate,
+               struct nfsd4_layoutreturn *lrp)
+{
+       struct nfs4_layout_stateid *ls, *n;
+       struct nfs4_client *clp = cstate->clp;
+       struct nfs4_layout *lp, *t;
+       LIST_HEAD(reaplist);
+
+       lrp->lrs_present = 0;
+
+       spin_lock(&clp->cl_lock);
+       list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
+               if (lrp->lr_return_type == RETURN_FSID &&
+                   !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
+                                  &cstate->current_fh.fh_handle))
+                       continue;
+
+               spin_lock(&ls->ls_lock);
+               list_for_each_entry_safe(lp, t, &ls->ls_layouts, lo_perstate) {
+                       if (lrp->lr_seg.iomode == IOMODE_ANY ||
+                           lrp->lr_seg.iomode == lp->lo_seg.iomode)
+                               list_move_tail(&lp->lo_perstate, &reaplist);
+               }
+               spin_unlock(&ls->ls_lock);
+       }
+       spin_unlock(&clp->cl_lock);
+
+       nfsd4_free_layouts(&reaplist);
+       return 0;
+}
+
+static void
+nfsd4_return_all_layouts(struct nfs4_layout_stateid *ls,
+               struct list_head *reaplist)
+{
+       spin_lock(&ls->ls_lock);
+       list_splice_init(&ls->ls_layouts, reaplist);
+       spin_unlock(&ls->ls_lock);
+}
+
+void
+nfsd4_return_all_client_layouts(struct nfs4_client *clp)
+{
+       struct nfs4_layout_stateid *ls, *n;
+       LIST_HEAD(reaplist);
+
+       spin_lock(&clp->cl_lock);
+       list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt)
+               nfsd4_return_all_layouts(ls, &reaplist);
+       spin_unlock(&clp->cl_lock);
+
+       nfsd4_free_layouts(&reaplist);
+}
+
+void
+nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
+{
+       struct nfs4_layout_stateid *ls, *n;
+       LIST_HEAD(reaplist);
+
+       spin_lock(&fp->fi_lock);
+       list_for_each_entry_safe(ls, n, &fp->fi_lo_states, ls_perfile) {
+               if (ls->ls_stid.sc_client == clp)
+                       nfsd4_return_all_layouts(ls, &reaplist);
+       }
+       spin_unlock(&fp->fi_lock);
+
+       nfsd4_free_layouts(&reaplist);
+}
+
+int
+nfsd4_init_pnfs(void)
+{
+       int i;
+
+       for (i = 0; i < DEVID_HASH_SIZE; i++)
+               INIT_LIST_HEAD(&nfsd_devid_hash[i]);
+
+       nfs4_layout_cache = kmem_cache_create("nfs4_layout",
+                       sizeof(struct nfs4_layout), 0, 0, NULL);
+       if (!nfs4_layout_cache)
+               return -ENOMEM;
+
+       nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid",
+                       sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
+       if (!nfs4_layout_stateid_cache) {
+               kmem_cache_destroy(nfs4_layout_cache);
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+void
+nfsd4_exit_pnfs(void)
+{
+       int i;
+
+       kmem_cache_destroy(nfs4_layout_cache);
+       kmem_cache_destroy(nfs4_layout_stateid_cache);
+
+       for (i = 0; i < DEVID_HASH_SIZE; i++) {
+               struct nfsd4_deviceid_map *map, *n;
+
+               list_for_each_entry_safe(map, n, &nfsd_devid_hash[i], hash)
+                       kfree(map);
+       }
+}
 
 #include "current_stateid.h"
 #include "netns.h"
 #include "acl.h"
+#include "pnfs.h"
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
        return status == nfserr_same ? nfs_ok : status;
 }
 
+#ifdef CONFIG_NFSD_PNFS
+static const struct nfsd4_layout_ops *
+nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
+{
+       if (!exp->ex_layout_type) {
+               dprintk("%s: export does not support pNFS\n", __func__);
+               return NULL;
+       }
+
+       if (exp->ex_layout_type != layout_type) {
+               dprintk("%s: layout type %d not supported\n",
+                       __func__, layout_type);
+               return NULL;
+       }
+
+       return nfsd4_layout_ops[layout_type];
+}
+
+static __be32
+nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate,
+               struct nfsd4_getdeviceinfo *gdp)
+{
+       const struct nfsd4_layout_ops *ops;
+       struct nfsd4_deviceid_map *map;
+       struct svc_export *exp;
+       __be32 nfserr;
+
+       dprintk("%s: layout_type %u dev_id [0x%llx:0x%x] maxcnt %u\n",
+              __func__,
+              gdp->gd_layout_type,
+              gdp->gd_devid.fsid_idx, gdp->gd_devid.generation,
+              gdp->gd_maxcount);
+
+       map = nfsd4_find_devid_map(gdp->gd_devid.fsid_idx);
+       if (!map) {
+               dprintk("%s: couldn't find device ID to export mapping!\n",
+                       __func__);
+               return nfserr_noent;
+       }
+
+       exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid);
+       if (IS_ERR(exp)) {
+               dprintk("%s: could not find device id\n", __func__);
+               return nfserr_noent;
+       }
+
+       nfserr = nfserr_layoutunavailable;
+       ops = nfsd4_layout_verify(exp, gdp->gd_layout_type);
+       if (!ops)
+               goto out;
+
+       nfserr = nfs_ok;
+       if (gdp->gd_maxcount != 0)
+               nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
+
+       gdp->gd_notify_types &= ops->notify_types;
+       exp_put(exp);
+out:
+       return nfserr;
+}
+
+static __be32
+nfsd4_layoutget(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate,
+               struct nfsd4_layoutget *lgp)
+{
+       struct svc_fh *current_fh = &cstate->current_fh;
+       const struct nfsd4_layout_ops *ops;
+       struct nfs4_layout_stateid *ls;
+       __be32 nfserr;
+       int accmode;
+
+       switch (lgp->lg_seg.iomode) {
+       case IOMODE_READ:
+               accmode = NFSD_MAY_READ;
+               break;
+       case IOMODE_RW:
+               accmode = NFSD_MAY_READ | NFSD_MAY_WRITE;
+               break;
+       default:
+               dprintk("%s: invalid iomode %d\n",
+                       __func__, lgp->lg_seg.iomode);
+               nfserr = nfserr_badiomode;
+               goto out;
+       }
+
+       nfserr = fh_verify(rqstp, current_fh, 0, accmode);
+       if (nfserr)
+               goto out;
+
+       nfserr = nfserr_layoutunavailable;
+       ops = nfsd4_layout_verify(current_fh->fh_export, lgp->lg_layout_type);
+       if (!ops)
+               goto out;
+
+       /*
+        * Verify minlength and range as per RFC5661:
+        *  o  If loga_length is less than loga_minlength,
+        *     the metadata server MUST return NFS4ERR_INVAL.
+        *  o  If the sum of loga_offset and loga_minlength exceeds
+        *     NFS4_UINT64_MAX, and loga_minlength is not
+        *     NFS4_UINT64_MAX, the error NFS4ERR_INVAL MUST result.
+        *  o  If the sum of loga_offset and loga_length exceeds
+        *     NFS4_UINT64_MAX, and loga_length is not NFS4_UINT64_MAX,
+        *     the error NFS4ERR_INVAL MUST result.
+        */
+       nfserr = nfserr_inval;
+       if (lgp->lg_seg.length < lgp->lg_minlength ||
+           (lgp->lg_minlength != NFS4_MAX_UINT64 &&
+            lgp->lg_minlength > NFS4_MAX_UINT64 - lgp->lg_seg.offset) ||
+           (lgp->lg_seg.length != NFS4_MAX_UINT64 &&
+            lgp->lg_seg.length > NFS4_MAX_UINT64 - lgp->lg_seg.offset))
+               goto out;
+       if (lgp->lg_seg.length == 0)
+               goto out;
+
+       nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid,
+                                               true, lgp->lg_layout_type, &ls);
+       if (nfserr)
+               goto out;
+
+       nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode,
+                                    current_fh, lgp);
+       if (nfserr)
+               goto out_put_stid;
+
+       nfserr = nfsd4_insert_layout(lgp, ls);
+
+out_put_stid:
+       nfs4_put_stid(&ls->ls_stid);
+out:
+       return nfserr;
+}
+
+static __be32
+nfsd4_layoutcommit(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate,
+               struct nfsd4_layoutcommit *lcp)
+{
+       const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
+       struct svc_fh *current_fh = &cstate->current_fh;
+       const struct nfsd4_layout_ops *ops;
+       loff_t new_size = lcp->lc_last_wr + 1;
+       struct inode *inode;
+       struct nfs4_layout_stateid *ls;
+       __be32 nfserr;
+
+       nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE);
+       if (nfserr)
+               goto out;
+
+       nfserr = nfserr_layoutunavailable;
+       ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type);
+       if (!ops)
+               goto out;
+       inode = current_fh->fh_dentry->d_inode;
+
+       nfserr = nfserr_inval;
+       if (new_size <= seg->offset) {
+               dprintk("pnfsd: last write before layout segment\n");
+               goto out;
+       }
+       if (new_size > seg->offset + seg->length) {
+               dprintk("pnfsd: last write beyond layout segment\n");
+               goto out;
+       }
+       if (!lcp->lc_newoffset && new_size > i_size_read(inode)) {
+               dprintk("pnfsd: layoutcommit beyond EOF\n");
+               goto out;
+       }
+
+       nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid,
+                                               false, lcp->lc_layout_type,
+                                               &ls);
+       if (nfserr) {
+               /* fixup error code as per RFC5661 */
+               if (nfserr == nfserr_bad_stateid)
+                       nfserr = nfserr_badlayout;
+               goto out;
+       }
+
+       nfserr = ops->proc_layoutcommit(inode, lcp);
+       if (nfserr)
+               goto out_put_stid;
+
+       if (new_size > i_size_read(inode)) {
+               lcp->lc_size_chg = 1;
+               lcp->lc_newsize = new_size;
+       } else {
+               lcp->lc_size_chg = 0;
+       }
+
+out_put_stid:
+       nfs4_put_stid(&ls->ls_stid);
+out:
+       return nfserr;
+}
+
+static __be32
+nfsd4_layoutreturn(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate,
+               struct nfsd4_layoutreturn *lrp)
+{
+       struct svc_fh *current_fh = &cstate->current_fh;
+       __be32 nfserr;
+
+       nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
+       if (nfserr)
+               goto out;
+
+       nfserr = nfserr_layoutunavailable;
+       if (!nfsd4_layout_verify(current_fh->fh_export, lrp->lr_layout_type))
+               goto out;
+
+       switch (lrp->lr_seg.iomode) {
+       case IOMODE_READ:
+       case IOMODE_RW:
+       case IOMODE_ANY:
+               break;
+       default:
+               dprintk("%s: invalid iomode %d\n", __func__,
+                       lrp->lr_seg.iomode);
+               nfserr = nfserr_inval;
+               goto out;
+       }
+
+       switch (lrp->lr_return_type) {
+       case RETURN_FILE:
+               nfserr = nfsd4_return_file_layouts(rqstp, cstate, lrp);
+               break;
+       case RETURN_FSID:
+       case RETURN_ALL:
+               nfserr = nfsd4_return_client_layouts(rqstp, cstate, lrp);
+               break;
+       default:
+               dprintk("%s: invalid return_type %d\n", __func__,
+                       lrp->lr_return_type);
+               nfserr = nfserr_inval;
+               break;
+       }
+out:
+       return nfserr;
+}
+#endif /* CONFIG_NFSD_PNFS */
+
 /*
  * NULL call.
  */
                op_encode_channel_attrs_maxsz) * sizeof(__be32);
 }
 
+#ifdef CONFIG_NFSD_PNFS
+/*
+ * At this stage we don't really know what layout driver will handle the request,
+ * so we need to define an arbitrary upper bound here.
+ */
+#define MAX_LAYOUT_SIZE                128
+static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size +
+               1 /* logr_return_on_close */ +
+               op_encode_stateid_maxsz +
+               1 /* nr of layouts */ +
+               MAX_LAYOUT_SIZE) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size +
+               1 /* locr_newsize */ +
+               2 /* ns_size */) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size +
+               1 /* lrs_stateid */ +
+               op_encode_stateid_maxsz) * sizeof(__be32);
+}
+#endif /* CONFIG_NFSD_PNFS */
+
 static struct nfsd4_operation nfsd4_ops[] = {
        [OP_ACCESS] = {
                .op_func = (nfsd4op_func)nfsd4_access,
                .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
                .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
+#ifdef CONFIG_NFSD_PNFS
+       [OP_GETDEVICEINFO] = {
+               .op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
+               .op_flags = ALLOWED_WITHOUT_FH,
+               .op_name = "OP_GETDEVICEINFO",
+       },
+       [OP_LAYOUTGET] = {
+               .op_func = (nfsd4op_func)nfsd4_layoutget,
+               .op_flags = OP_MODIFIES_SOMETHING,
+               .op_name = "OP_LAYOUTGET",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize,
+       },
+       [OP_LAYOUTCOMMIT] = {
+               .op_func = (nfsd4op_func)nfsd4_layoutcommit,
+               .op_flags = OP_MODIFIES_SOMETHING,
+               .op_name = "OP_LAYOUTCOMMIT",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize,
+       },
+       [OP_LAYOUTRETURN] = {
+               .op_func = (nfsd4op_func)nfsd4_layoutreturn,
+               .op_flags = OP_MODIFIES_SOMETHING,
+               .op_name = "OP_LAYOUTRETURN",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize,
+       },
+#endif /* CONFIG_NFSD_PNFS */
 
        /* NFSv4.2 operations */
        [OP_ALLOCATE] = {
 
 #include "current_stateid.h"
 
 #include "netns.h"
+#include "pnfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
        INIT_LIST_HEAD(&clp->cl_lru);
        INIT_LIST_HEAD(&clp->cl_callbacks);
        INIT_LIST_HEAD(&clp->cl_revoked);
+#ifdef CONFIG_NFSD_PNFS
+       INIT_LIST_HEAD(&clp->cl_lo_states);
+#endif
        spin_lock_init(&clp->cl_lock);
        rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
        return clp;
                nfs4_get_stateowner(&oo->oo_owner);
                release_openowner(oo);
        }
+       nfsd4_return_all_client_layouts(clp);
        nfsd4_shutdown_callback(clp);
        if (clp->cl_cb_conn.cb_xprt)
                svc_xprt_put(clp->cl_cb_conn.cb_xprt);
 static void
 nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
 {
-       /* pNFS is not supported */
+#ifdef CONFIG_NFSD_PNFS
+       new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS;
+#else
        new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
+#endif
 
        /* Referrals are supported, Migration is not. */
        new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
        fp->fi_share_deny = 0;
        memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
        memset(fp->fi_access, 0, sizeof(fp->fi_access));
+#ifdef CONFIG_NFSD_PNFS
+       INIT_LIST_HEAD(&fp->fi_lo_states);
+#endif
        hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
 }
 
        update_stateid(&stp->st_stid.sc_stateid);
        memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
 
+       nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
+                                     stp->st_stid.sc_file);
+
        nfsd4_close_open_stateid(stp);
 
        /* put reference from nfs4_preprocess_seqid_op */
 
 #include "state.h"
 #include "cache.h"
 #include "netns.h"
+#include "pnfs.h"
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
        DECODE_TAIL;
 }
 
+#ifdef CONFIG_NFSD_PNFS
+static __be32
+nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
+               struct nfsd4_getdeviceinfo *gdev)
+{
+       DECODE_HEAD;
+       u32 num, i;
+
+       READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4);
+       COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid));
+       gdev->gd_layout_type = be32_to_cpup(p++);
+       gdev->gd_maxcount = be32_to_cpup(p++);
+       num = be32_to_cpup(p++);
+       if (num) {
+               READ_BUF(4 * num);
+               gdev->gd_notify_types = be32_to_cpup(p++);
+               for (i = 1; i < num; i++) {
+                       if (be32_to_cpup(p++)) {
+                               status = nfserr_inval;
+                               goto out;
+                       }
+               }
+       }
+       DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+               struct nfsd4_layoutget *lgp)
+{
+       DECODE_HEAD;
+
+       READ_BUF(36);
+       lgp->lg_signal = be32_to_cpup(p++);
+       lgp->lg_layout_type = be32_to_cpup(p++);
+       lgp->lg_seg.iomode = be32_to_cpup(p++);
+       p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
+       p = xdr_decode_hyper(p, &lgp->lg_seg.length);
+       p = xdr_decode_hyper(p, &lgp->lg_minlength);
+       nfsd4_decode_stateid(argp, &lgp->lg_sid);
+       READ_BUF(4);
+       lgp->lg_maxcount = be32_to_cpup(p++);
+
+       DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
+               struct nfsd4_layoutcommit *lcp)
+{
+       DECODE_HEAD;
+       u32 timechange;
+
+       READ_BUF(20);
+       p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
+       p = xdr_decode_hyper(p, &lcp->lc_seg.length);
+       lcp->lc_reclaim = be32_to_cpup(p++);
+       nfsd4_decode_stateid(argp, &lcp->lc_sid);
+       READ_BUF(4);
+       lcp->lc_newoffset = be32_to_cpup(p++);
+       if (lcp->lc_newoffset) {
+               READ_BUF(8);
+               p = xdr_decode_hyper(p, &lcp->lc_last_wr);
+       } else
+               lcp->lc_last_wr = 0;
+       READ_BUF(4);
+       timechange = be32_to_cpup(p++);
+       if (timechange) {
+               status = nfsd4_decode_time(argp, &lcp->lc_mtime);
+               if (status)
+                       return status;
+       } else {
+               lcp->lc_mtime.tv_nsec = UTIME_NOW;
+       }
+       READ_BUF(8);
+       lcp->lc_layout_type = be32_to_cpup(p++);
+
+       /*
+        * Save the layout update in XDR format and let the layout driver deal
+        * with it later.
+        */
+       lcp->lc_up_len = be32_to_cpup(p++);
+       if (lcp->lc_up_len > 0) {
+               READ_BUF(lcp->lc_up_len);
+               READMEM(lcp->lc_up_layout, lcp->lc_up_len);
+       }
+
+       DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
+               struct nfsd4_layoutreturn *lrp)
+{
+       DECODE_HEAD;
+
+       READ_BUF(16);
+       lrp->lr_reclaim = be32_to_cpup(p++);
+       lrp->lr_layout_type = be32_to_cpup(p++);
+       lrp->lr_seg.iomode = be32_to_cpup(p++);
+       lrp->lr_return_type = be32_to_cpup(p++);
+       if (lrp->lr_return_type == RETURN_FILE) {
+               READ_BUF(16);
+               p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
+               p = xdr_decode_hyper(p, &lrp->lr_seg.length);
+               nfsd4_decode_stateid(argp, &lrp->lr_sid);
+               READ_BUF(4);
+               lrp->lrf_body_len = be32_to_cpup(p++);
+               if (lrp->lrf_body_len > 0) {
+                       READ_BUF(lrp->lrf_body_len);
+                       READMEM(lrp->lrf_body, lrp->lrf_body_len);
+               }
+       } else {
+               lrp->lr_seg.offset = 0;
+               lrp->lr_seg.length = NFS4_MAX_UINT64;
+       }
+
+       DECODE_TAIL;
+}
+#endif /* CONFIG_NFSD_PNFS */
+
 static __be32
 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
                       struct nfsd4_fallocate *fallocate)
        [OP_DESTROY_SESSION]    = (nfsd4_dec)nfsd4_decode_destroy_session,
        [OP_FREE_STATEID]       = (nfsd4_dec)nfsd4_decode_free_stateid,
        [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
+#ifdef CONFIG_NFSD_PNFS
+       [OP_GETDEVICEINFO]      = (nfsd4_dec)nfsd4_decode_getdeviceinfo,
+       [OP_GETDEVICELIST]      = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_LAYOUTCOMMIT]       = (nfsd4_dec)nfsd4_decode_layoutcommit,
+       [OP_LAYOUTGET]          = (nfsd4_dec)nfsd4_decode_layoutget,
+       [OP_LAYOUTRETURN]       = (nfsd4_dec)nfsd4_decode_layoutreturn,
+#else
        [OP_GETDEVICEINFO]      = (nfsd4_dec)nfsd4_decode_notsupp,
        [OP_GETDEVICELIST]      = (nfsd4_dec)nfsd4_decode_notsupp,
        [OP_LAYOUTCOMMIT]       = (nfsd4_dec)nfsd4_decode_notsupp,
        [OP_LAYOUTGET]          = (nfsd4_dec)nfsd4_decode_notsupp,
        [OP_LAYOUTRETURN]       = (nfsd4_dec)nfsd4_decode_notsupp,
+#endif
        [OP_SECINFO_NO_NAME]    = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
        [OP_SEQUENCE]           = (nfsd4_dec)nfsd4_decode_sequence,
        [OP_SET_SSV]            = (nfsd4_dec)nfsd4_decode_notsupp,
                        get_parent_attributes(exp, &stat);
                p = xdr_encode_hyper(p, stat.ino);
        }
+#ifdef CONFIG_NFSD_PNFS
+       if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) ||
+           (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) {
+               if (exp->ex_layout_type) {
+                       p = xdr_reserve_space(xdr, 8);
+                       if (!p)
+                               goto out_resource;
+                       *p++ = cpu_to_be32(1);
+                       *p++ = cpu_to_be32(exp->ex_layout_type);
+               } else {
+                       p = xdr_reserve_space(xdr, 4);
+                       if (!p)
+                               goto out_resource;
+                       *p++ = cpu_to_be32(0);
+               }
+       }
+
+       if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
+               p = xdr_reserve_space(xdr, 4);
+               if (!p)
+                       goto out_resource;
+               *p++ = cpu_to_be32(stat.blksize);
+       }
+#endif /* CONFIG_NFSD_PNFS */
        if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
                status = nfsd4_encode_security_label(xdr, rqstp, context,
                                                                contextlen);
        return nfserr;
 }
 
+#ifdef CONFIG_NFSD_PNFS
+static __be32
+nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+               struct nfsd4_getdeviceinfo *gdev)
+{
+       struct xdr_stream *xdr = &resp->xdr;
+       const struct nfsd4_layout_ops *ops =
+               nfsd4_layout_ops[gdev->gd_layout_type];
+       u32 starting_len = xdr->buf->len, needed_len;
+       __be32 *p;
+
+       dprintk("%s: err %d\n", __func__, nfserr);
+       if (nfserr)
+               goto out;
+
+       nfserr = nfserr_resource;
+       p = xdr_reserve_space(xdr, 4);
+       if (!p)
+               goto out;
+
+       *p++ = cpu_to_be32(gdev->gd_layout_type);
+
+       /* If maxcount is 0 then just update notifications */
+       if (gdev->gd_maxcount != 0) {
+               nfserr = ops->encode_getdeviceinfo(xdr, gdev);
+               if (nfserr) {
+                       /*
+                        * We don't bother to burden the layout drivers with
+                        * enforcing gd_maxcount, just tell the client to
+                        * come back with a bigger buffer if it's not enough.
+                        */
+                       if (xdr->buf->len + 4 > gdev->gd_maxcount)
+                               goto toosmall;
+                       goto out;
+               }
+       }
+
+       nfserr = nfserr_resource;
+       if (gdev->gd_notify_types) {
+               p = xdr_reserve_space(xdr, 4 + 4);
+               if (!p)
+                       goto out;
+               *p++ = cpu_to_be32(1);                  /* bitmap length */
+               *p++ = cpu_to_be32(gdev->gd_notify_types);
+       } else {
+               p = xdr_reserve_space(xdr, 4);
+               if (!p)
+                       goto out;
+               *p++ = 0;
+       }
+
+       nfserr = 0;
+out:
+       kfree(gdev->gd_device);
+       dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr));
+       return nfserr;
+
+toosmall:
+       dprintk("%s: maxcount too small\n", __func__);
+       needed_len = xdr->buf->len + 4 /* notifications */;
+       xdr_truncate_encode(xdr, starting_len);
+       p = xdr_reserve_space(xdr, 4);
+       if (!p) {
+               nfserr = nfserr_resource;
+       } else {
+               *p++ = cpu_to_be32(needed_len);
+               nfserr = nfserr_toosmall;
+       }
+       goto out;
+}
+
+static __be32
+nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
+               struct nfsd4_layoutget *lgp)
+{
+       struct xdr_stream *xdr = &resp->xdr;
+       const struct nfsd4_layout_ops *ops =
+               nfsd4_layout_ops[lgp->lg_layout_type];
+       __be32 *p;
+
+       dprintk("%s: err %d\n", __func__, nfserr);
+       if (nfserr)
+               goto out;
+
+       nfserr = nfserr_resource;
+       p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t));
+       if (!p)
+               goto out;
+
+       *p++ = cpu_to_be32(1);  /* we always set return-on-close */
+       *p++ = cpu_to_be32(lgp->lg_sid.si_generation);
+       p = xdr_encode_opaque_fixed(p, &lgp->lg_sid.si_opaque,
+                                   sizeof(stateid_opaque_t));
+
+       *p++ = cpu_to_be32(1);  /* we always return a single layout */
+       p = xdr_encode_hyper(p, lgp->lg_seg.offset);
+       p = xdr_encode_hyper(p, lgp->lg_seg.length);
+       *p++ = cpu_to_be32(lgp->lg_seg.iomode);
+       *p++ = cpu_to_be32(lgp->lg_layout_type);
+
+       nfserr = ops->encode_layoutget(xdr, lgp);
+out:
+       kfree(lgp->lg_content);
+       return nfserr;
+}
+
+static __be32
+nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
+                         struct nfsd4_layoutcommit *lcp)
+{
+       struct xdr_stream *xdr = &resp->xdr;
+       __be32 *p;
+
+       if (nfserr)
+               return nfserr;
+
+       p = xdr_reserve_space(xdr, 4);
+       if (!p)
+               return nfserr_resource;
+       *p++ = cpu_to_be32(lcp->lc_size_chg);
+       if (lcp->lc_size_chg) {
+               p = xdr_reserve_space(xdr, 8);
+               if (!p)
+                       return nfserr_resource;
+               p = xdr_encode_hyper(p, lcp->lc_newsize);
+       }
+
+       return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
+               struct nfsd4_layoutreturn *lrp)
+{
+       struct xdr_stream *xdr = &resp->xdr;
+       __be32 *p;
+
+       if (nfserr)
+               return nfserr;
+
+       p = xdr_reserve_space(xdr, 4);
+       if (!p)
+               return nfserr_resource;
+       *p++ = cpu_to_be32(lrp->lrs_present);
+       if (lrp->lrs_present)
+               nfsd4_encode_stateid(xdr, &lrp->lr_sid);
+       return nfs_ok;
+}
+#endif /* CONFIG_NFSD_PNFS */
+
 static __be32
 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
                  struct nfsd4_seek *seek)
        [OP_DESTROY_SESSION]    = (nfsd4_enc)nfsd4_encode_noop,
        [OP_FREE_STATEID]       = (nfsd4_enc)nfsd4_encode_noop,
        [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
+#ifdef CONFIG_NFSD_PNFS
+       [OP_GETDEVICEINFO]      = (nfsd4_enc)nfsd4_encode_getdeviceinfo,
+       [OP_GETDEVICELIST]      = (nfsd4_enc)nfsd4_encode_noop,
+       [OP_LAYOUTCOMMIT]       = (nfsd4_enc)nfsd4_encode_layoutcommit,
+       [OP_LAYOUTGET]          = (nfsd4_enc)nfsd4_encode_layoutget,
+       [OP_LAYOUTRETURN]       = (nfsd4_enc)nfsd4_encode_layoutreturn,
+#else
        [OP_GETDEVICEINFO]      = (nfsd4_enc)nfsd4_encode_noop,
        [OP_GETDEVICELIST]      = (nfsd4_enc)nfsd4_encode_noop,
        [OP_LAYOUTCOMMIT]       = (nfsd4_enc)nfsd4_encode_noop,
        [OP_LAYOUTGET]          = (nfsd4_enc)nfsd4_encode_noop,
        [OP_LAYOUTRETURN]       = (nfsd4_enc)nfsd4_encode_noop,
+#endif
        [OP_SECINFO_NO_NAME]    = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
        [OP_SEQUENCE]           = (nfsd4_enc)nfsd4_encode_sequence,
        [OP_SET_SSV]            = (nfsd4_enc)nfsd4_encode_noop,
 
 #include "cache.h"
 #include "state.h"
 #include "netns.h"
+#include "pnfs.h"
 
 /*
  *     We have a single directory with several nodes in it.
        retval = nfsd4_init_slabs();
        if (retval)
                goto out_unregister_pernet;
-       retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+       retval = nfsd4_init_pnfs();
        if (retval)
                goto out_free_slabs;
+       retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+       if (retval)
+               goto out_exit_pnfs;
        nfsd_stat_init();       /* Statistics */
        retval = nfsd_reply_cache_init();
        if (retval)
 out_free_stat:
        nfsd_stat_shutdown();
        nfsd_fault_inject_cleanup();
+out_exit_pnfs:
+       nfsd4_exit_pnfs();
 out_free_slabs:
        nfsd4_free_slabs();
 out_unregister_pernet:
        nfsd_stat_shutdown();
        nfsd_lockd_shutdown();
        nfsd4_free_slabs();
+       nfsd4_exit_pnfs();
        nfsd_fault_inject_cleanup();
        unregister_filesystem(&nfsd_fs_type);
        unregister_pernet_subsys(&nfsd_net_ops);
 
 
 #define NFSD4_SUPPORTED_ATTRS_WORD2 0
 
+/* 4.1 */
+#ifdef CONFIG_NFSD_PNFS
+#define PNFSD_SUPPORTED_ATTRS_WORD1    FATTR4_WORD1_FS_LAYOUT_TYPES
+#define PNFSD_SUPPORTED_ATTRS_WORD2 \
+(FATTR4_WORD2_LAYOUT_BLKSIZE   | FATTR4_WORD2_LAYOUT_TYPES)
+#else
+#define PNFSD_SUPPORTED_ATTRS_WORD1    0
+#define PNFSD_SUPPORTED_ATTRS_WORD2    0
+#endif /* CONFIG_NFSD_PNFS */
+
 #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
        NFSD4_SUPPORTED_ATTRS_WORD0
 
 #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
-       NFSD4_SUPPORTED_ATTRS_WORD1
+       (NFSD4_SUPPORTED_ATTRS_WORD1    | PNFSD_SUPPORTED_ATTRS_WORD1)
 
 #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
-       (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
+       (NFSD4_SUPPORTED_ATTRS_WORD2    | PNFSD_SUPPORTED_ATTRS_WORD2 | \
+        FATTR4_WORD2_SUPPATTR_EXCLCREAT)
 
+/* 4.2 */
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #define NFSD4_2_SECURITY_ATTRS         FATTR4_WORD2_SECURITY_LABEL
 #else
 
--- /dev/null
+#ifndef _FS_NFSD_PNFS_H
+#define _FS_NFSD_PNFS_H 1
+
+#include <linux/exportfs.h>
+#include <linux/nfsd/export.h>
+
+#include "state.h"
+#include "xdr4.h"
+
+struct xdr_stream;
+
+struct nfsd4_deviceid_map {
+       struct list_head        hash;
+       u64                     idx;
+       int                     fsid_type;
+       u32                     fsid[];
+};
+
+struct nfsd4_layout_ops {
+       u32             notify_types;
+
+       __be32 (*proc_getdeviceinfo)(struct super_block *sb,
+                       struct nfsd4_getdeviceinfo *gdevp);
+       __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
+                       struct nfsd4_getdeviceinfo *gdevp);
+
+       __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
+                       struct nfsd4_layoutget *lgp);
+       __be32 (*encode_layoutget)(struct xdr_stream *,
+                       struct nfsd4_layoutget *lgp);
+
+       __be32 (*proc_layoutcommit)(struct inode *inode,
+                       struct nfsd4_layoutcommit *lcp);
+};
+
+extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
+
+__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate, stateid_t *stateid,
+               bool create, u32 layout_type, struct nfs4_layout_stateid **lsp);
+__be32 nfsd4_insert_layout(struct nfsd4_layoutget *lgp,
+               struct nfs4_layout_stateid *ls);
+__be32 nfsd4_return_file_layouts(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate,
+               struct nfsd4_layoutreturn *lrp);
+__be32 nfsd4_return_client_layouts(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate,
+               struct nfsd4_layoutreturn *lrp);
+int nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
+               u32 device_generation);
+struct nfsd4_deviceid_map *nfsd4_find_devid_map(int idx);
+
+#ifdef CONFIG_NFSD_PNFS
+void nfsd4_setup_layout_type(struct svc_export *exp);
+void nfsd4_return_all_client_layouts(struct nfs4_client *);
+void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
+               struct nfs4_file *fp);
+int nfsd4_init_pnfs(void);
+void nfsd4_exit_pnfs(void);
+#else
+static inline void nfsd4_setup_layout_type(struct svc_export *exp)
+{
+}
+
+static inline void nfsd4_return_all_client_layouts(struct nfs4_client *clp)
+{
+}
+static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
+               struct nfs4_file *fp)
+{
+}
+static inline void nfsd4_exit_pnfs(void)
+{
+}
+static inline int nfsd4_init_pnfs(void)
+{
+       return 0;
+}
+#endif /* CONFIG_NFSD_PNFS */
+#endif /* _FS_NFSD_PNFS_H */
 
 /* For a deleg stateid kept around only to process free_stateid's: */
 #define NFS4_REVOKED_DELEG_STID 16
 #define NFS4_CLOSED_DELEG_STID 32
+#define NFS4_LAYOUT_STID 64
        unsigned char sc_type;
        stateid_t sc_stateid;
        struct nfs4_client *sc_client;
        struct list_head        cl_delegations;
        struct list_head        cl_revoked;     /* unacknowledged, revoked 4.1 state */
        struct list_head        cl_lru;         /* tail queue */
+#ifdef CONFIG_NFSD_PNFS
+       struct list_head        cl_lo_states;   /* outstanding layout states */
+#endif
        struct xdr_netobj       cl_name;        /* id generated by client */
        nfs4_verifier           cl_verifier;    /* generated by client */
        time_t                  cl_time;        /* time of last lease renewal */
        int                     fi_delegees;
        struct knfsd_fh         fi_fhandle;
        bool                    fi_had_conflict;
+#ifdef CONFIG_NFSD_PNFS
+       struct list_head        fi_lo_states;
+#endif
 };
 
 /*
        return container_of(s, struct nfs4_ol_stateid, st_stid);
 }
 
+struct nfs4_layout_stateid {
+       struct nfs4_stid                ls_stid;
+       struct list_head                ls_perclnt;
+       struct list_head                ls_perfile;
+       spinlock_t                      ls_lock;
+       struct list_head                ls_layouts;
+       u32                             ls_layout_type;
+};
+
+static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
+{
+       return container_of(s, struct nfs4_layout_stateid, ls_stid);
+}
+
 /* flags for preprocess_seqid_op() */
 #define RD_STATE               0x00000010
 #define WR_STATE               0x00000020
 
        u32 rca_one_fs;
 };
 
+struct nfsd4_deviceid {
+       u64                     fsid_idx;
+       u32                     generation;
+       u32                     pad;
+};
+
+struct nfsd4_layout_seg {
+       u32                     iomode;
+       u64                     offset;
+       u64                     length;
+};
+
+struct nfsd4_getdeviceinfo {
+       struct nfsd4_deviceid   gd_devid;       /* request */
+       u32                     gd_layout_type; /* request */
+       u32                     gd_maxcount;    /* request */
+       u32                     gd_notify_types;/* request - response */
+       void                    *gd_device;     /* response */
+};
+
+struct nfsd4_layoutget {
+       u64                     lg_minlength;   /* request */
+       u32                     lg_signal;      /* request */
+       u32                     lg_layout_type; /* request */
+       u32                     lg_maxcount;    /* request */
+       stateid_t               lg_sid;         /* request/response */
+       struct nfsd4_layout_seg lg_seg;         /* request/response */
+       void                    *lg_content;    /* response */
+};
+
+struct nfsd4_layoutcommit {
+       stateid_t               lc_sid;         /* request */
+       struct nfsd4_layout_seg lc_seg;         /* request */
+       u32                     lc_reclaim;     /* request */
+       u32                     lc_newoffset;   /* request */
+       u64                     lc_last_wr;     /* request */
+       struct timespec         lc_mtime;       /* request */
+       u32                     lc_layout_type; /* request */
+       u32                     lc_up_len;      /* layout length */
+       void                    *lc_up_layout;  /* decoded by callback */
+       u32                     lc_size_chg;    /* boolean for response */
+       u64                     lc_newsize;     /* response */
+};
+
+struct nfsd4_layoutreturn {
+       u32                     lr_return_type; /* request */
+       u32                     lr_layout_type; /* request */
+       struct nfsd4_layout_seg lr_seg;         /* request */
+       u32                     lr_reclaim;     /* request */
+       u32                     lrf_body_len;   /* request */
+       void                    *lrf_body;      /* request */
+       stateid_t               lr_sid;         /* request/response */
+       u32                     lrs_present;    /* response */
+};
+
 struct nfsd4_fallocate {
        /* request */
        stateid_t       falloc_stateid;
                struct nfsd4_reclaim_complete   reclaim_complete;
                struct nfsd4_test_stateid       test_stateid;
                struct nfsd4_free_stateid       free_stateid;
+               struct nfsd4_getdeviceinfo      getdeviceinfo;
+               struct nfsd4_layoutget          layoutget;
+               struct nfsd4_layoutcommit       layoutcommit;
+               struct nfsd4_layoutreturn       layoutreturn;
 
                /* NFSv4.2 */
                struct nfsd4_fallocate          allocate;
 
 #define FATTR4_WORD1_TIME_MODIFY_SET    (1UL << 22)
 #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
 #define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
+#define FATTR4_WORD2_LAYOUT_TYPES       (1UL << 0)
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
 
 #define NFSDDBG_REPCACHE       0x0080
 #define NFSDDBG_XDR            0x0100
 #define NFSDDBG_LOCKD          0x0200
+#define NFSDDBG_PNFS           0x0400
 #define NFSDDBG_ALL            0x7FFF
 #define NFSDDBG_NOCHANGE       0xFFFF
 
 
  * exported filesystem.
  */
 #define        NFSEXP_V4ROOT           0x10000
+#define NFSEXP_NOPNFS          0x20000
+
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS                0x1FE7F
+#define NFSEXP_ALLFLAGS                0x3FE7F
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS   (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \