nfsd: don't hold i_mutex over userspace upcalls

author NeilBrown <neilb@suse.de>

Thu, 7 Jan 2016 21:08:20 +0000 (16:08 -0500)

committer Shan Hai <shan.hai@oracle.com>

Fri, 4 Aug 2017 05:43:35 +0000 (13:43 +0800)
author NeilBrown <neilb@suse.de>
Thu, 7 Jan 2016 21:08:20 +0000 (16:08 -0500)
committer Shan Hai <shan.hai@oracle.com>
Fri, 4 Aug 2017 05:43:35 +0000 (13:43 +0800)
diff --git a/fs/namei.c b/fs/namei.c

index 701871ace9be34fa45272541bf9d9dde18c74b7e..9782a111104155cebcb1443ea183b820dbfbbfa6 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2176,6 +2176,8 @@ static struct dentry *lookup_hash(struct nameidata *nd)
   *
   * Note that this routine is purely a helper for filesystem usage and should
   * not be called by generic code.
+ *
+ * The caller must hold base->i_mutex.
   */
  struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
  {
@@ -2219,6 +2221,75 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
  }
  EXPORT_SYMBOL(lookup_one_len);
  
+/**
+ * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
+ * @name:      pathname component to lookup
+ * @base:      base directory to lookup from
+ * @len:       maximum length @len should be interpreted to
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * Unlike lookup_one_len, it should be called without the parent
+ * i_mutex held, and will take the i_mutex itself if necessary.
+ */
+struct dentry *lookup_one_len_unlocked(const char *name,
+                                      struct dentry *base, int len)
+{
+       struct qstr this;
+       unsigned int c;
+       int err;
+       struct dentry *ret;
+
+       this.name = name;
+       this.len = len;
+       this.hash = full_name_hash(name, len);
+       if (!len)
+               return ERR_PTR(-EACCES);
+
+       if (unlikely(name[0] == '.')) {
+               if (len < 2 || (len == 2 && name[1] == '.'))
+                       return ERR_PTR(-EACCES);
+       }
+
+       while (len--) {
+               c = *(const unsigned char *)name++;
+               if (c == '/' || c == '\0')
+                       return ERR_PTR(-EACCES);
+       }
+       /*
+        * See if the low-level filesystem might want
+        * to use its own hash..
+        */
+       if (base->d_flags & DCACHE_OP_HASH) {
+               int err = base->d_op->d_hash(base, &this);
+               if (err < 0)
+                       return ERR_PTR(err);
+       }
+
+       err = inode_permission(base->d_inode, MAY_EXEC);
+       if (err)
+               return ERR_PTR(err);
+
+       /*
+        * __d_lookup() is used to try to get a quick answer and avoid the
+        * mutex.  A false-negative does no harm.
+        */
+       ret = __d_lookup(base, &this);
+       if (ret && unlikely(ret->d_flags & DCACHE_OP_REVALIDATE)) {
+               dput(ret);
+               ret = NULL;
+       }
+       if (ret)
+               return ret;
+
+       mutex_lock(&base->d_inode->i_mutex);
+       ret =  __lookup_hash(&this, base, 0);
+       mutex_unlock(&base->d_inode->i_mutex);
+       return ret;
+}
+EXPORT_SYMBOL(lookup_one_len_unlocked);
+
  int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
                  struct path *path, int *empty)
  {
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c

index 769472c0688a86660aba923a90fa01e5c173164d..68165b2283a08aa8b098d67be83cfb16914e82c5 100644 (file)
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -828,7 +828,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
                 } else
                         dchild = dget(dparent);
         } else
-               dchild = lookup_one_len(name, dparent, namlen);
+               dchild = lookup_one_len_unlocked(name, dparent, namlen);
         if (IS_ERR(dchild))
                 return rv;
         if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c

index d8297542f8b3e1d67436d2e20e50930f20d2949b..a9454dcfdd356566c42bf4ed5a7c0b0decfce6ca 100644 (file)
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2825,14 +2825,14 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
         __be32 nfserr;
         int ignore_crossmnt = 0;
  
-       dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
+       dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
         if (IS_ERR(dentry))
                 return nfserrno(PTR_ERR(dentry));
         if (d_really_is_negative(dentry)) {
                 /*
-                * nfsd_buffered_readdir drops the i_mutex between
-                * readdir and calling this callback, leaving a window
-                * where this directory entry could have gone away.
+                * we're not holding the i_mutex here, so there's
+                * a window where this directory entry could have gone
+                * away.
                  */
                 dput(dentry);
                 return nfserr_noent;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index 84d770be056ee7b1a4db58e3ca0acb648b91a18d..e87a26f6549958c7c79933c53e2c337ccb263d9b 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -217,10 +217,16 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 host_err = PTR_ERR(dentry);
                 if (IS_ERR(dentry))
                         goto out_nfserr;
-               /*
-                * check if we have crossed a mount point ...
-                */
                 if (nfsd_mountpoint(dentry, exp)) {
+                       /*
+                        * We don't need the i_mutex after all.  It's
+                        * still possible we could open this (regular
+                        * files can be mountpoints too), but the
+                        * i_mutex is just there to prevent renames of
+                        * something that we might be about to delegate,
+                        * and a mountpoint won't be renamed:
+                        */
+                       fh_unlock(fhp);
                         if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
                                 dput(dentry);
                                 goto out_nfserr;
@@ -1885,7 +1891,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
         offset = *offsetp;
  
         while (1) {
-               struct inode *dir_inode = file_inode(file);
                 unsigned int reclen;
  
                 cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1904,15 +1909,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
                 if (!size)
                         break;
  
-               /*
-                * Various filldir functions may end up calling back into
-                * lookup_one_len() and the file system's ->lookup() method.
-                * These expect i_mutex to be held, as it would within readdir.
-                */
-               host_err = mutex_lock_killable(&dir_inode->i_mutex);
-               if (host_err)
-                       break;
-
                 de = (struct buffered_dirent *)buf.dirent;
                 while (size > 0) {
                         offset = de->offset;
@@ -1929,7 +1925,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
                         size -= reclen;
                         de = (struct buffered_dirent *)((char *)de + reclen);
                 }
-               mutex_unlock(&dir_inode->i_mutex);
                 if (size > 0) /* We bailed out early */
                         break;
  
diff --git a/include/linux/namei.h b/include/linux/namei.h

index c8990779f0c33b99e552ca9406621cde03f49443..bb3a2f7cca67c0bcaba891ed89cdaaa95e162d37 100644 (file)
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -62,6 +62,7 @@ extern struct dentry *kern_path_locked(const char *, struct path *);
  extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
  
  extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
+extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
  
  extern int follow_down_one(struct path *);
  extern int follow_down(struct path *);
author	NeilBrown <neilb@suse.de>
	Thu, 7 Jan 2016 21:08:20 +0000 (16:08 -0500)
committer	Shan Hai <shan.hai@oracle.com>
	Fri, 4 Aug 2017 05:43:35 +0000 (13:43 +0800)
fs/namei.c		patch \| blob \| history
fs/nfsd/nfs3xdr.c		patch \| blob \| history
fs/nfsd/nfs4xdr.c		patch \| blob \| history
fs/nfsd/vfs.c		patch \| blob \| history
include/linux/namei.h		patch \| blob \| history