* atomic_long_cmpxchg() will be used to obtain writer lock.
  *
  * There are three places where the lock handoff bit may be set or cleared.
- * 1) rwsem_mark_wake() for readers.
- * 2) rwsem_try_write_lock() for writers.
- * 3) Error path of rwsem_down_write_slowpath().
+ * 1) rwsem_mark_wake() for readers            -- set, clear
+ * 2) rwsem_try_write_lock() for writers       -- set, clear
+ * 3) rwsem_del_waiter()                       -- clear
  *
  * For all the above cases, wait_lock will be held. A writer must also
  * be the first one in the wait_list to be eligible for setting the handoff
        struct task_struct *task;
        enum rwsem_waiter_type type;
        unsigned long timeout;
+
+       /* Writer only, not initialized in reader */
+       bool handoff_set;
 };
 #define rwsem_first_waiter(sem) \
        list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
        RWSEM_WAKE_READ_OWNED   /* Waker thread holds the read lock */
 };
 
-enum writer_wait_state {
-       WRITER_NOT_FIRST,       /* Writer is not first in wait list */
-       WRITER_FIRST,           /* Writer is first in wait list     */
-       WRITER_HANDOFF          /* Writer is first & handoff needed */
-};
-
 /*
  * The typical HZ value is either 250 or 1000. So set the minimum waiting
  * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
  */
 #define MAX_READERS_WAKEUP     0x100
 
+static inline void
+rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+{
+       lockdep_assert_held(&sem->wait_lock);
+       list_add_tail(&waiter->list, &sem->wait_list);
+       /* caller will set RWSEM_FLAG_WAITERS */
+}
+
+/*
+ * Remove a waiter from the wait_list and clear flags.
+ *
+ * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
+ * this function. Modify with care.
+ */
+static inline void
+rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+{
+       lockdep_assert_held(&sem->wait_lock);
+       list_del(&waiter->list);
+       if (likely(!list_empty(&sem->wait_list)))
+               return;
+
+       atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
+}
+
 /*
  * handle the lock release when processes blocked on it that can now run
  * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
  *   preferably when the wait_lock is released
  * - woken process blocks are discarded from the list after having task zeroed
  * - writers are only marked woken if downgrading is false
+ *
+ * Implies rwsem_del_waiter() for all woken readers.
  */
 static void rwsem_mark_wake(struct rw_semaphore *sem,
                            enum rwsem_wake_type wake_type,
 
        adjustment = woken * RWSEM_READER_BIAS - adjustment;
        lockevent_cond_inc(rwsem_wake_reader, woken);
+
+       oldcount = atomic_long_read(&sem->count);
        if (list_empty(&sem->wait_list)) {
-               /* hit end of list above */
+               /*
+                * Combined with list_move_tail() above, this implies
+                * rwsem_del_waiter().
+                */
                adjustment -= RWSEM_FLAG_WAITERS;
+               if (oldcount & RWSEM_FLAG_HANDOFF)
+                       adjustment -= RWSEM_FLAG_HANDOFF;
+       } else if (woken) {
+               /*
+                * When we've woken a reader, we no longer need to force
+                * writers to give up the lock and we can clear HANDOFF.
+                */
+               if (oldcount & RWSEM_FLAG_HANDOFF)
+                       adjustment -= RWSEM_FLAG_HANDOFF;
        }
 
-       /*
-        * When we've woken a reader, we no longer need to force writers
-        * to give up the lock and we can clear HANDOFF.
-        */
-       if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
-               adjustment -= RWSEM_FLAG_HANDOFF;
-
        if (adjustment)
                atomic_long_add(adjustment, &sem->count);
 
  * race conditions between checking the rwsem wait list and setting the
  * sem->count accordingly.
  *
- * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
- * bit is set or the lock is acquired with handoff bit cleared.
+ * Implies rwsem_del_waiter() on success.
  */
 static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
-                                       enum writer_wait_state wstate)
+                                       struct rwsem_waiter *waiter)
 {
+       bool first = rwsem_first_waiter(sem) == waiter;
        long count, new;
 
        lockdep_assert_held(&sem->wait_lock);
        do {
                bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
 
-               if (has_handoff && wstate == WRITER_NOT_FIRST)
-                       return false;
+               if (has_handoff) {
+                       if (!first)
+                               return false;
+
+                       /* First waiter inherits a previously set handoff bit */
+                       waiter->handoff_set = true;
+               }
 
                new = count;
 
                if (count & RWSEM_LOCK_MASK) {
-                       if (has_handoff || (wstate != WRITER_HANDOFF))
+                       if (has_handoff || (!rt_task(waiter->task) &&
+                                           !time_after(jiffies, waiter->timeout)))
                                return false;
 
                        new |= RWSEM_FLAG_HANDOFF;
         * We have either acquired the lock with handoff bit cleared or
         * set the handoff bit.
         */
-       if (new & RWSEM_FLAG_HANDOFF)
+       if (new & RWSEM_FLAG_HANDOFF) {
+               waiter->handoff_set = true;
+               lockevent_inc(rwsem_wlock_handoff);
                return false;
+       }
 
+       /*
+        * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
+        * success.
+        */
+       list_del(&waiter->list);
        rwsem_set_owner(sem);
        return true;
 }
                }
                adjustment += RWSEM_FLAG_WAITERS;
        }
-       list_add_tail(&waiter.list, &sem->wait_list);
+       rwsem_add_waiter(sem, &waiter);
 
        /* we're now waiting on the lock, but no longer actively locking */
        count = atomic_long_add_return(adjustment, &sem->count);
        return sem;
 
 out_nolock:
-       list_del(&waiter.list);
-       if (list_empty(&sem->wait_list)) {
-               atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
-                                  &sem->count);
-       }
+       rwsem_del_waiter(sem, &waiter);
        raw_spin_unlock_irq(&sem->wait_lock);
        __set_current_state(TASK_RUNNING);
        lockevent_inc(rwsem_rlock_fail);
 rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 {
        long count;
-       enum writer_wait_state wstate;
        struct rwsem_waiter waiter;
-       struct rw_semaphore *ret = sem;
        DEFINE_WAKE_Q(wake_q);
 
        /* do optimistic spinning and steal lock if possible */
        waiter.task = current;
        waiter.type = RWSEM_WAITING_FOR_WRITE;
        waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+       waiter.handoff_set = false;
 
        raw_spin_lock_irq(&sem->wait_lock);
-
-       /* account for this before adding a new element to the list */
-       wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
-
-       list_add_tail(&waiter.list, &sem->wait_list);
+       rwsem_add_waiter(sem, &waiter);
 
        /* we're now waiting on the lock */
-       if (wstate == WRITER_NOT_FIRST) {
+       if (rwsem_first_waiter(sem) != &waiter) {
                count = atomic_long_read(&sem->count);
 
                /*
        /* wait until we successfully acquire the lock */
        set_current_state(state);
        for (;;) {
-               if (rwsem_try_write_lock(sem, wstate)) {
+               if (rwsem_try_write_lock(sem, &waiter)) {
                        /* rwsem_try_write_lock() implies ACQUIRE on success */
                        break;
                }
 
                raw_spin_unlock_irq(&sem->wait_lock);
 
+               if (signal_pending_state(state, current))
+                       goto out_nolock;
+
                /*
                 * After setting the handoff bit and failing to acquire
                 * the lock, attempt to spin on owner to accelerate lock
                 * In this case, we attempt to acquire the lock again
                 * without sleeping.
                 */
-               if (wstate == WRITER_HANDOFF) {
+               if (waiter.handoff_set) {
                        enum owner_state owner_state;
 
                        preempt_disable();
                                goto trylock_again;
                }
 
-               /* Block until there are no active lockers. */
-               for (;;) {
-                       if (signal_pending_state(state, current))
-                               goto out_nolock;
-
-                       schedule();
-                       lockevent_inc(rwsem_sleep_writer);
-                       set_current_state(state);
-                       /*
-                        * If HANDOFF bit is set, unconditionally do
-                        * a trylock.
-                        */
-                       if (wstate == WRITER_HANDOFF)
-                               break;
-
-                       if ((wstate == WRITER_NOT_FIRST) &&
-                           (rwsem_first_waiter(sem) == &waiter))
-                               wstate = WRITER_FIRST;
-
-                       count = atomic_long_read(&sem->count);
-                       if (!(count & RWSEM_LOCK_MASK))
-                               break;
-
-                       /*
-                        * The setting of the handoff bit is deferred
-                        * until rwsem_try_write_lock() is called.
-                        */
-                       if ((wstate == WRITER_FIRST) && (rt_task(current) ||
-                           time_after(jiffies, waiter.timeout))) {
-                               wstate = WRITER_HANDOFF;
-                               lockevent_inc(rwsem_wlock_handoff);
-                               break;
-                       }
-               }
+               schedule();
+               lockevent_inc(rwsem_sleep_writer);
+               set_current_state(state);
 trylock_again:
                raw_spin_lock_irq(&sem->wait_lock);
        }
        __set_current_state(TASK_RUNNING);
-       list_del(&waiter.list);
        raw_spin_unlock_irq(&sem->wait_lock);
        lockevent_inc(rwsem_wlock);
-
-       return ret;
+       return sem;
 
 out_nolock:
        __set_current_state(TASK_RUNNING);
        raw_spin_lock_irq(&sem->wait_lock);
-       list_del(&waiter.list);
-
-       if (unlikely(wstate == WRITER_HANDOFF))
-               atomic_long_add(-RWSEM_FLAG_HANDOFF,  &sem->count);
-
-       if (list_empty(&sem->wait_list))
-               atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
-       else
+       rwsem_del_waiter(sem, &waiter);
+       if (!list_empty(&sem->wait_list))
                rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
        raw_spin_unlock_irq(&sem->wait_lock);
        wake_up_q(&wake_q);
        lockevent_inc(rwsem_wlock_fail);
-
        return ERR_PTR(-EINTR);
 }