locking/pvqspinlock: Only kick CPU at unlock time

author Waiman Long <Waiman.Long@hp.com>

Sat, 11 Jul 2015 20:36:52 +0000 (16:36 -0400)

committer Ingo Molnar <mingo@kernel.org>

Mon, 3 Aug 2015 08:57:11 +0000 (10:57 +0200)
author Waiman Long <Waiman.Long@hp.com>
Sat, 11 Jul 2015 20:36:52 +0000 (16:36 -0400)
committer Ingo Molnar <mingo@kernel.org>
Mon, 3 Aug 2015 08:57:11 +0000 (10:57 +0200)
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c

index 38c49202d532b3b96554756b706b0fc64127c7aa..337c8818541d339aac3fd1e3e6af32dac6dff4c9 100644 (file)
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -239,8 +239,8 @@ static __always_inline void set_locked(struct qspinlock *lock)
  
  static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
  static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
-
+static __always_inline void __pv_kick_node(struct qspinlock *lock,
+                                          struct mcs_spinlock *node) { }
  static __always_inline void __pv_wait_head(struct qspinlock *lock,
                                            struct mcs_spinlock *node) { }
  
@@ -440,7 +440,7 @@ queue:
                 cpu_relax();
  
         arch_mcs_spin_unlock_contended(&next->locked);
-       pv_kick_node(next);
+       pv_kick_node(lock, next);
  
  release:
         /*
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h

index ab8b1bb8caa411d86cecb915329ed7be7ce4de2b..c8e6e9a596f513baa8a85af3cb0f2b9ff9116a91 100644 (file)
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -22,9 +22,14 @@
  
  #define _Q_SLOW_VAL    (3U << _Q_LOCKED_OFFSET)
  
+/*
+ * Queue node uses: vcpu_running & vcpu_halted.
+ * Queue head uses: vcpu_running & vcpu_hashed.
+ */
  enum vcpu_state {
         vcpu_running = 0,
-       vcpu_halted,
+       vcpu_halted,            /* Used only in pv_wait_node */
+       vcpu_hashed,            /* = pv_hash'ed + vcpu_halted */
  };
  
  struct pv_node {
@@ -153,7 +158,8 @@ static void pv_init_node(struct mcs_spinlock *node)
  
  /*
   * Wait for node->locked to become true, halt the vcpu after a short spin.
- * pv_kick_node() is used to wake the vcpu again.
+ * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
+ * behalf.
   */
  static void pv_wait_node(struct mcs_spinlock *node)
  {
@@ -172,9 +178,9 @@ static void pv_wait_node(struct mcs_spinlock *node)
                  *
                  * [S] pn->state = vcpu_halted    [S] next->locked = 1
                  *     MB                             MB
-                * [L] pn->locked               [RmW] pn->state = vcpu_running
+                * [L] pn->locked               [RmW] pn->state = vcpu_hashed
                  *
-                * Matches the xchg() from pv_kick_node().
+                * Matches the cmpxchg() from pv_kick_node().
                  */
                 smp_store_mb(pn->state, vcpu_halted);
  
@@ -182,9 +188,10 @@ static void pv_wait_node(struct mcs_spinlock *node)
                         pv_wait(&pn->state, vcpu_halted);
  
                 /*
-                * Reset the vCPU state to avoid unncessary CPU kicking
+                * If pv_kick_node() changed us to vcpu_hashed, retain that value
+                * so that pv_wait_head() knows to not also try to hash this lock.
                  */
-               WRITE_ONCE(pn->state, vcpu_running);
+               cmpxchg(&pn->state, vcpu_halted, vcpu_running);
  
                 /*
                  * If the locked flag is still not set after wakeup, it is a
@@ -194,6 +201,7 @@ static void pv_wait_node(struct mcs_spinlock *node)
                  * MCS lock will be released soon.
                  */
         }
+
         /*
          * By now our node->locked should be 1 and our caller will not actually
          * spin-wait for it. We do however rely on our caller to do a
@@ -202,24 +210,35 @@ static void pv_wait_node(struct mcs_spinlock *node)
  }
  
  /*
- * Called after setting next->locked = 1, used to wake those stuck in
- * pv_wait_node().
+ * Called after setting next->locked = 1 when we're the lock owner.
+ *
+ * Instead of waking the waiters stuck in pv_wait_node() advance their state such
+ * that they're waiting in pv_wait_head(), this avoids a wake/sleep cycle.
   */
-static void pv_kick_node(struct mcs_spinlock *node)
+static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
  {
         struct pv_node *pn = (struct pv_node *)node;
+       struct __qspinlock *l = (void *)lock;
  
         /*
-        * Note that because node->locked is already set, this actual
-        * mcs_spinlock entry could be re-used already.
+        * If the vCPU is indeed halted, advance its state to match that of
+        * pv_wait_node(). If OTOH this fails, the vCPU was running and will
+        * observe its next->locked value and advance itself.
          *
-        * This should be fine however, kicking people for no reason is
-        * harmless.
+        * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
+        */
+       if (cmpxchg(&pn->state, vcpu_halted, vcpu_hashed) != vcpu_halted)
+               return;
+
+       /*
+        * Put the lock into the hash table and set the _Q_SLOW_VAL.
          *
-        * See the comment in pv_wait_node().
+        * As this is the same vCPU that will check the _Q_SLOW_VAL value and
+        * the hash table later on at unlock time, no atomic instruction is
+        * needed.
          */
-       if (xchg(&pn->state, vcpu_running) == vcpu_halted)
-               pv_kick(pn->cpu);
+       WRITE_ONCE(l->locked, _Q_SLOW_VAL);
+       (void)pv_hash(lock, pn);
  }
  
  /*
@@ -233,6 +252,13 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
         struct qspinlock **lp = NULL;
         int loop;
  
+       /*
+        * If pv_kick_node() already advanced our state, we don't need to
+        * insert ourselves into the hash table anymore.
+        */
+       if (READ_ONCE(pn->state) == vcpu_hashed)
+               lp = (struct qspinlock **)1;
+
         for (;;) {
                 for (loop = SPIN_THRESHOLD; loop; loop--) {
                         if (!READ_ONCE(l->locked))
@@ -240,9 +266,10 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
                         cpu_relax();
                 }
  
-               WRITE_ONCE(pn->state, vcpu_halted);
                 if (!lp) { /* ONCE */
+                       WRITE_ONCE(pn->state, vcpu_hashed);
                         lp = pv_hash(lock, pn);
+
                         /*
                          * We must hash before setting _Q_SLOW_VAL, such that
                          * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
@@ -333,8 +360,11 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
         /*
          * At this point the memory pointed at by lock can be freed/reused,
          * however we can still use the pv_node to kick the CPU.
+        * The other vCPU may not really be halted, but kicking an active
+        * vCPU is harmless other than the additional latency in completing
+        * the unlock.
          */
-       if (READ_ONCE(node->state) == vcpu_halted)
+       if (READ_ONCE(node->state) == vcpu_hashed)
                 pv_kick(node->cpu);
  }
  /*
author	Waiman Long <Waiman.Long@hp.com>
	Sat, 11 Jul 2015 20:36:52 +0000 (16:36 -0400)
committer	Ingo Molnar <mingo@kernel.org>
	Mon, 3 Aug 2015 08:57:11 +0000 (10:57 +0200)
kernel/locking/qspinlock.c		patch \| blob \| history
kernel/locking/qspinlock_paravirt.h		patch \| blob \| history