/* Data structures. */
 
-static struct lock_class_key rcu_root_class;
+static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
 
 #define RCU_STATE_INITIALIZER(name) { \
        .level = { &name.node[0] }, \
 {
        unsigned long flags;
        unsigned long mask;
+       int need_quiet = 0;
        struct rcu_data *rdp = rsp->rda[cpu];
        struct rcu_node *rnp;
 
                spin_lock(&rnp->lock);          /* irqs already disabled. */
                rnp->qsmaskinit &= ~mask;
                if (rnp->qsmaskinit != 0) {
-                       spin_unlock(&rnp->lock); /* irqs remain disabled. */
+                       if (rnp != rdp->mynode)
+                               spin_unlock(&rnp->lock); /* irqs remain disabled. */
                        break;
                }
-
-               /*
-                * If there was a task blocking the current grace period,
-                * and if all CPUs have checked in, we need to propagate
-                * the quiescent state up the rcu_node hierarchy.  But that
-                * is inconvenient at the moment due to deadlock issues if
-                * this should end the current grace period.  So set the
-                * offlined CPU's bit in ->qsmask in order to force the
-                * next force_quiescent_state() invocation to clean up this
-                * mess in a deadlock-free manner.
-                */
-               if (rcu_preempt_offline_tasks(rsp, rnp, rdp) && !rnp->qsmask)
-                       rnp->qsmask |= mask;
-
+               if (rnp == rdp->mynode)
+                       need_quiet = rcu_preempt_offline_tasks(rsp, rnp, rdp);
+               else
+                       spin_unlock(&rnp->lock); /* irqs remain disabled. */
                mask = rnp->grpmask;
-               spin_unlock(&rnp->lock);        /* irqs remain disabled. */
                rnp = rnp->parent;
        } while (rnp != NULL);
 
-       spin_unlock_irqrestore(&rsp->onofflock, flags);
+       /*
+        * We still hold the leaf rcu_node structure lock here, and
+        * irqs are still disabled.  The reason for this subterfuge is
+        * because invoking task_quiet() with ->onofflock held leads
+        * to deadlock.
+        */
+       spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
+       rnp = rdp->mynode;
+       if (need_quiet)
+               task_quiet(rnp, flags);
+       else
+               spin_unlock_irqrestore(&rnp->lock, flags);
 
        rcu_adopt_orphan_cbs(rsp);
 }
                rnp = rsp->level[i];
                for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
                        spin_lock_init(&rnp->lock);
+                       lockdep_set_class(&rnp->lock, &rcu_node_class[i]);
                        rnp->gpnum = 0;
                        rnp->qsmask = 0;
                        rnp->qsmaskinit = 0;
                        INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
                }
        }
-       lockdep_set_class(&rcu_get_root(rsp)->lock, &rcu_root_class);
 }
 
 /*
 
        return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
 }
 
+/*
+ * Record a quiescent state for all tasks that were previously queued
+ * on the specified rcu_node structure and that were blocking the current
+ * RCU grace period.  The caller must hold the specified rnp->lock with
+ * irqs disabled, and this lock is released upon return, but irqs remain
+ * disabled.
+ */
+static void task_quiet(struct rcu_node *rnp, unsigned long flags)
+       __releases(rnp->lock)
+{
+       unsigned long mask;
+       struct rcu_node *rnp_p;
+
+       if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
+               spin_unlock_irqrestore(&rnp->lock, flags);
+               return;  /* Still need more quiescent states! */
+       }
+
+       rnp_p = rnp->parent;
+       if (rnp_p == NULL) {
+               /*
+                * Either there is only one rcu_node in the tree,
+                * or tasks were kicked up to root rcu_node due to
+                * CPUs going offline.
+                */
+               cpu_quiet_msk_finish(&rcu_preempt_state, flags);
+               return;
+       }
+
+       /* Report up the rest of the hierarchy. */
+       mask = rnp->grpmask;
+       spin_unlock(&rnp->lock);        /* irqs remain disabled. */
+       spin_lock(&rnp_p->lock);        /* irqs already disabled. */
+       cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags);
+}
+
+/*
+ * Handle special cases during rcu_read_unlock(), such as needing to
+ * notify RCU core processing or task having blocked during the RCU
+ * read-side critical section.
+ */
 static void rcu_read_unlock_special(struct task_struct *t)
 {
        int empty;
        unsigned long flags;
-       unsigned long mask;
        struct rcu_node *rnp;
        int special;
 
                /*
                 * If this was the last task on the current list, and if
                 * we aren't waiting on any CPUs, report the quiescent state.
-                * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
-                * drop rnp->lock and restore irq.
+                * Note that task_quiet() releases rnp->lock.
                 */
-               if (!empty && rnp->qsmask == 0 &&
-                   !rcu_preempted_readers(rnp)) {
-                       struct rcu_node *rnp_p;
-
-                       if (rnp->parent == NULL) {
-                               /* Only one rcu_node in the tree. */
-                               cpu_quiet_msk_finish(&rcu_preempt_state, flags);
-                               return;
-                       }
-                       /* Report up the rest of the hierarchy. */
-                       mask = rnp->grpmask;
+               if (empty)
                        spin_unlock_irqrestore(&rnp->lock, flags);
-                       rnp_p = rnp->parent;
-                       spin_lock_irqsave(&rnp_p->lock, flags);
-                       WARN_ON_ONCE(rnp->qsmask);
-                       cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags);
-                       return;
-               }
-               spin_unlock(&rnp->lock);
+               else
+                       task_quiet(rnp, flags);
+       } else {
+               local_irq_restore(flags);
        }
-       local_irq_restore(flags);
 }
 
 /*
  * rcu_node.  The reason for not just moving them to the immediate
  * parent is to remove the need for rcu_read_unlock_special() to
  * make more than two attempts to acquire the target rcu_node's lock.
+ * Returns true if there were tasks blocking the current RCU grace
+ * period.
  *
  * Returns 1 if there was previously a task blocking the current grace
  * period on the specified rcu_node structure.
        int i;
        struct list_head *lp;
        struct list_head *lp_root;
-       int retval = rcu_preempted_readers(rnp);
+       int retval;
        struct rcu_node *rnp_root = rcu_get_root(rsp);
        struct task_struct *tp;
 
         * rcu_nodes in terms of gp_num value.  This fact allows us to
         * move the blocked_tasks[] array directly, element by element.
         */
+       retval = rcu_preempted_readers(rnp);
        for (i = 0; i < 2; i++) {
                lp = &rnp->blocked_tasks[i];
                lp_root = &rnp_root->blocked_tasks[i];
                        spin_unlock(&rnp_root->lock); /* irqs remain disabled */
                }
        }
-
        return retval;
 }
 
        return 0;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Because preemptible RCU does not exist, no quieting of tasks. */
+static void task_quiet(struct rcu_node *rnp, unsigned long flags)
+{
+       spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 
 /*