Make use of arch_mutex_cpu_relax() so architectures can override the
default cpu_relax() semantics.
This is especially useful for s390, where cpu_relax() means that we
yield() the current (virtual) cpu and therefore is very expensive,
and would contradict the whole purpose of the lockless cmpxchg loop.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
 # define cmpxchg64_relaxed cmpxchg64
 #endif
 
+/*
+ * Allow architectures to override the default cpu_relax() within CMPXCHG_LOOP.
+ * This is useful for architectures with an expensive cpu_relax().
+ */
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
+#endif
+
 /*
  * Note that the "cmpxchg()" reloads the "old" value for the
  * failure case.
                if (likely(old.lock_count == prev.lock_count)) {                \
                        SUCCESS;                                                \
                }                                                               \
-               cpu_relax();                                                    \
+               arch_mutex_cpu_relax();                                         \
        }                                                                       \
 } while (0)