{
        unsigned int val;
 
-       smp_mb();
-
        __asm__ __volatile__(
        "1:     llock   %[val], [%[slock]]      \n"
        "       breq    %[val], %[LOCKED], 1b   \n"     /* spin while LOCKED */
          [LOCKED]      "r"     (__ARCH_SPIN_LOCK_LOCKED__)
        : "memory", "cc");
 
+       /*
+        * ACQUIRE barrier to ensure load/store after taking the lock
+        * don't "bleed-up" out of the critical section (leak-in is allowed)
+        * http://www.spinics.net/lists/kernel/msg2010409.html
+        *
+        * ARCv2 only has load-load, store-store and all-all barrier
+        * thus need the full all-all barrier
+        */
        smp_mb();
 }
 
 {
        unsigned int val, got_it = 0;
 
-       smp_mb();
-
        __asm__ __volatile__(
        "1:     llock   %[val], [%[slock]]      \n"
        "       breq    %[val], %[LOCKED], 4f   \n"     /* already LOCKED, just bail */
 {
        smp_mb();
 
-       lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
-
-       smp_mb();
+       WRITE_ONCE(lock->slock, __ARCH_SPIN_LOCK_UNLOCKED__);
 }
 
 /*
 {
        unsigned int val;
 
-       smp_mb();
-
        /*
         * zero means writer holds the lock exclusively, deny Reader.
         * Otherwise grant lock to first/subseq reader
 {
        unsigned int val, got_it = 0;
 
-       smp_mb();
-
        __asm__ __volatile__(
        "1:     llock   %[val], [%[rwlock]]     \n"
        "       brls    %[val], %[WR_LOCKED], 4f\n"     /* <= 0: already write locked, bail */
 {
        unsigned int val;
 
-       smp_mb();
-
        /*
         * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
         * deny writer. Otherwise if unlocked grant to writer
 {
        unsigned int val, got_it = 0;
 
-       smp_mb();
-
        __asm__ __volatile__(
        "1:     llock   %[val], [%[rwlock]]     \n"
        "       brne    %[val], %[UNLOCKED], 4f \n"     /* !UNLOCKED, bail */
        : [val]         "=&r"   (val)
        : [rwlock]      "r"     (&(rw->counter))
        : "memory", "cc");
-
-       smp_mb();
 }
 
 static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
        smp_mb();
 
-       rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
-
-       smp_mb();
+       WRITE_ONCE(rw->counter, __ARCH_RW_LOCK_UNLOCKED__);
 }
 
 #else  /* !CONFIG_ARC_HAS_LLSC */
        unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
 
        /*
-        * This smp_mb() is technically superfluous, we only need the one
-        * after the lock for providing the ACQUIRE semantics.
-        * However doing the "right" thing was regressing hackbench
-        * so keeping this, pending further investigation
+        * Per lkmm, smp_mb() is only required after _lock (and before_unlock)
+        * for ACQ and REL semantics respectively. However EX based spinlocks
+        * need the extra smp_mb to workaround a hardware quirk.
         */
        smp_mb();
 
 #endif
        : "memory");
 
-       /*
-        * ACQUIRE barrier to ensure load/store after taking the lock
-        * don't "bleed-up" out of the critical section (leak-in is allowed)
-        * http://www.spinics.net/lists/kernel/msg2010409.html
-        *
-        * ARCv2 only has load-load, store-store and all-all barrier
-        * thus need the full all-all barrier
-        */
        smp_mb();
 }
 
        : "memory");
 
        /*
-        * superfluous, but keeping for now - see pairing version in
-        * arch_spin_lock above
+        * see pairing version/comment in arch_spin_lock above
         */
        smp_mb();
 }
        arch_spin_unlock(&(rw->lock_mutex));
        local_irq_restore(flags);
 
-       smp_mb();
        return ret;
 }