The core atomic code has a number of macros where it elaborates
architecture primitives into more functions. ARC uses
arch_atomic64_cmpxchg() as it's architecture primitive which disable alot
of the additional functions.
Instead provide arch_cmpxchg64_relaxed() as the primitive and rely on the
core macros to create arch_cmpxchg64().
The macros will also provide other functions, for instance,
try_cmpxchg64_release(), giving a more complete implementation.
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/Z0747n5bSep4_1VX@J2N7QTR9R3
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-static inline s64
-arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+static inline u64 __arch_cmpxchg64_relaxed(volatile void *ptr, u64 old, u64 new)
 {
-       s64 prev;
-
-       smp_mb();
+       u64 prev;
 
        __asm__ __volatile__(
        "1:     llockd  %0, [%1]        \n"
        "       bnz     1b              \n"
        "2:                             \n"
        : "=&r"(prev)
-       : "r"(ptr), "ir"(expected), "r"(new)
-       : "cc");        /* memory clobber comes from smp_mb() */
-
-       smp_mb();
+       : "r"(ptr), "ir"(old), "r"(new)
+       : "memory", "cc");
 
        return prev;
 }
-#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+#define arch_cmpxchg64_relaxed __arch_cmpxchg64_relaxed
 
 static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
 {