* hold it, acquire the root rcu_node structure's lock in order to
         * start one (if needed).
         */
-       if (rnp != rnp_root)
+       if (rnp != rnp_root) {
                raw_spin_lock(&rnp_root->lock);
+               smp_mb__after_unlock_lock();
+       }
 
        /*
         * Get a new grace-period number.  If there really is no grace
                local_irq_restore(flags);
                return;
        }
+       smp_mb__after_unlock_lock();
        __note_gp_changes(rsp, rnp, rdp);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
 
        rcu_bind_gp_kthread();
        raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
        if (rsp->gp_flags == 0) {
                /* Spurious wakeup, tell caller to go back to sleep.  */
                raw_spin_unlock_irq(&rnp->lock);
         */
        rcu_for_each_node_breadth_first(rsp, rnp) {
                raw_spin_lock_irq(&rnp->lock);
+               smp_mb__after_unlock_lock();
                rdp = this_cpu_ptr(rsp->rda);
                rcu_preempt_check_blocked_tasks(rnp);
                rnp->qsmask = rnp->qsmaskinit;
        /* Clear flag to prevent immediate re-entry. */
        if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                raw_spin_lock_irq(&rnp->lock);
+               smp_mb__after_unlock_lock();
                rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
                raw_spin_unlock_irq(&rnp->lock);
        }
        struct rcu_node *rnp = rcu_get_root(rsp);
 
        raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
        gp_duration = jiffies - rsp->gp_start;
        if (gp_duration > rsp->gp_max)
                rsp->gp_max = gp_duration;
         */
        rcu_for_each_node_breadth_first(rsp, rnp) {
                raw_spin_lock_irq(&rnp->lock);
+               smp_mb__after_unlock_lock();
                ACCESS_ONCE(rnp->completed) = rsp->gpnum;
                rdp = this_cpu_ptr(rsp->rda);
                if (rnp == rdp->mynode)
        }
        rnp = rcu_get_root(rsp);
        raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
        rcu_nocb_gp_set(rnp, nocb);
 
        rsp->completed = rsp->gpnum; /* Declare grace period done. */
                rnp_c = rnp;
                rnp = rnp->parent;
                raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                WARN_ON_ONCE(rnp_c->qsmask);
        }
 
 
        rnp = rdp->mynode;
        raw_spin_lock_irqsave(&rnp->lock, flags);
+       smp_mb__after_unlock_lock();
        if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
            rnp->completed == rnp->gpnum) {
 
        mask = rdp->grpmask;    /* rnp->grplo is constant. */
        do {
                raw_spin_lock(&rnp->lock);      /* irqs already disabled. */
+               smp_mb__after_unlock_lock();
                rnp->qsmaskinit &= ~mask;
                if (rnp->qsmaskinit != 0) {
                        if (rnp != rdp->mynode)
                cond_resched();
                mask = 0;
                raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                if (!rcu_gp_in_progress(rsp)) {
                        raw_spin_unlock_irqrestore(&rnp->lock, flags);
                        return;
        rnp = rcu_get_root(rsp);
        if (rnp->qsmask == 0) {
                raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
        }
 }
 
        /* Reached the root of the rcu_node tree, acquire lock. */
        raw_spin_lock_irqsave(&rnp_old->lock, flags);
+       smp_mb__after_unlock_lock();
        raw_spin_unlock(&rnp_old->fqslock);
        if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                rsp->n_force_qs_lh++;
                        struct rcu_node *rnp_root = rcu_get_root(rsp);
 
                        raw_spin_lock(&rnp_root->lock);
+                       smp_mb__after_unlock_lock();
                        rcu_start_gp(rsp);
                        raw_spin_unlock(&rnp_root->lock);
                } else {
 
                rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
                rnp = rdp->mynode;
                raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
                t->rcu_blocked_node = rnp;
 
        mask = rnp->grpmask;
        raw_spin_unlock(&rnp->lock);    /* irqs remain disabled. */
        raw_spin_lock(&rnp_p->lock);    /* irqs already disabled. */
+       smp_mb__after_unlock_lock();
        rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
 }
 
                for (;;) {
                        rnp = t->rcu_blocked_node;
                        raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
+                       smp_mb__after_unlock_lock();
                        if (rnp == t->rcu_blocked_node)
                                break;
                        raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
        while (!list_empty(lp)) {
                t = list_entry(lp->next, typeof(*t), rcu_node_entry);
                raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
+               smp_mb__after_unlock_lock();
                list_del(&t->rcu_node_entry);
                t->rcu_blocked_node = rnp_root;
                list_add(&t->rcu_node_entry, lp_root);
         * in this case.
         */
        raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
+       smp_mb__after_unlock_lock();
        if (rnp_root->boost_tasks != NULL &&
            rnp_root->boost_tasks != rnp_root->gp_tasks &&
            rnp_root->boost_tasks != rnp_root->exp_tasks)
        unsigned long mask;
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
+       smp_mb__after_unlock_lock();
        for (;;) {
                if (!sync_rcu_preempt_exp_done(rnp)) {
                        raw_spin_unlock_irqrestore(&rnp->lock, flags);
                raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
                rnp = rnp->parent;
                raw_spin_lock(&rnp->lock); /* irqs already disabled */
+               smp_mb__after_unlock_lock();
                rnp->expmask &= ~mask;
        }
 }
        int must_wait = 0;
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
+       smp_mb__after_unlock_lock();
        if (list_empty(&rnp->blkd_tasks)) {
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
        } else {
        /* Initialize ->expmask for all non-leaf rcu_node structures. */
        rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
                raw_spin_lock_irqsave(&rnp->lock, flags);
+               smp_mb__after_unlock_lock();
                rnp->expmask = rnp->qsmaskinit;
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
        }
                return 0;  /* Nothing left to boost. */
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
+       smp_mb__after_unlock_lock();
 
        /*
         * Recheck under the lock: all tasks in need of boosting
        if (IS_ERR(t))
                return PTR_ERR(t);
        raw_spin_lock_irqsave(&rnp->lock, flags);
+       smp_mb__after_unlock_lock();
        rnp->boost_kthread_task = t;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
        sp.sched_priority = RCU_BOOST_PRIO;
                        continue;
                rnp = rdp->mynode;
                raw_spin_lock(&rnp->lock); /* irqs already disabled. */
+               smp_mb__after_unlock_lock();
                rcu_accelerate_cbs(rsp, rnp, rdp);
                raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
        }
        struct rcu_node *rnp = rdp->mynode;
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
+       smp_mb__after_unlock_lock();
        c = rcu_start_future_gp(rnp, rdp);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);