From 0d98439ea3c6ffb2af931f6de4480e744634e2c5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 Sep 2013 13:46:52 -0700 Subject: [PATCH] vfs: use lockred "dead" flag to mark unrecoverably dead dentries This simplifies the RCU to refcounting code in particular. I was originally intending to leave this for later, but walking through all the dput() logic (see previous commit), I realized that the dput() "might_sleep()" check was misleadingly weak. And I removed it as misleading, both for performance profiling and for debugging. However, the might_sleep() debugging case is actually true: the final dput() can indeed sleep, if the inode of the dentry that you are releasing ends up sleeping at iput time (see dentry_iput()). So the problem with the might_sleep() in dput() wasn't that it wasn't true, it was that it wasn't actually testing and triggering on the interesting case. In particular, just about *any* dput() can indeed sleep, if you happen to race with another thread deleting the file in question, and you then lose the race to the be the last dput() for that file. But because it's a very rare race, the debugging code would never trigger it in practice. Why is this problematic? The new d_rcu_to_refcount() (see commit 15570086b590: "vfs: reimplement d_rcu_to_refcount() using lockref_get_or_lock()") does a dput() for the failure case, and it does it under the RCU lock. So potentially sleeping really is a bug. But there's no way I'm going to fix this with the previous complicated "lockref_get_or_lock()" interface. And rather than revert to the old and crufty nested dentry locking code (which did get this right by delaying the reference count updates until they were verified to be safe), let's make forward progress. Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/dcache.c | 17 ++++++++++------- fs/namei.c | 23 +++++------------------ 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index bf3c4f9569eb9..ca8e9cd60f87e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -229,7 +229,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { - BUG_ON(dentry->d_lockref.count); + BUG_ON((int)dentry->d_lockref.count > 0); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -445,7 +445,7 @@ EXPORT_SYMBOL(d_drop); * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) +static inline struct dentry *dentry_kill(struct dentry *dentry) __releases(dentry->d_lock) { struct inode *inode; @@ -468,8 +468,11 @@ relock: goto relock; } - if (ref) - dentry->d_lockref.count--; + /* + * The dentry is now unrecoverably dead to the world. + */ + lockref_mark_dead(&dentry->d_lockref); + /* * inform the fs via d_prune that this dentry is about to be * unhashed and destroyed. @@ -535,7 +538,7 @@ repeat: return; kill_it: - dentry = dentry_kill(dentry, 1); + dentry = dentry_kill(dentry); if (dentry) goto repeat; } @@ -760,7 +763,7 @@ static void try_prune_one_dentry(struct dentry *dentry) { struct dentry *parent; - parent = dentry_kill(dentry, 0); + parent = dentry_kill(dentry); /* * If dentry_kill returns NULL, we have nothing more to do. * if it returns the same dentry, trylocks failed. In either @@ -781,7 +784,7 @@ static void try_prune_one_dentry(struct dentry *dentry) while (dentry) { if (lockref_put_or_lock(&dentry->d_lockref)) return; - dentry = dentry_kill(dentry, 1); + dentry = dentry_kill(dentry); } } diff --git a/fs/namei.c b/fs/namei.c index f415c6683a837..cc4bcfaa86241 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -517,25 +517,12 @@ static inline void unlock_rcu_walk(void) */ static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq) { - int gotref; - - gotref = lockref_get_or_lock(&dentry->d_lockref); - - /* Does the sequence number still match? */ - if (read_seqcount_retry(validate, seq)) { - if (gotref) - dput(dentry); - else - spin_unlock(&dentry->d_lock); - return -ECHILD; - } - - /* Get the ref now, if we couldn't get it originally */ - if (!gotref) { - dentry->d_lockref.count++; - spin_unlock(&dentry->d_lock); + if (likely(lockref_get_not_dead(&dentry->d_lockref))) { + if (!read_seqcount_retry(validate, seq)) + return 0; + dput(dentry); } - return 0; + return -ECHILD; } /** -- 2.51.0