unsigned long tmp;                                              \
        u##sz oldval;                                                   \
                                                                        \
+       /*                                                              \
+        * Sub-word sizes require explicit casting so that the compare  \
+        * part of the cmpxchg doesn't end up interpreting non-zero     \
+        * upper bits of the register containing "old".                 \
+        */                                                             \
+       if (sz < 32)                                                    \
+               old = (u##sz)old;                                       \
+                                                                       \
        asm volatile(                                                   \
        "       prfm    pstl1strm, %[v]\n"                              \
        "1:     ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n"          \
 
 
 #define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...)                    \
 static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr,      \
-                                             unsigned long old,        \
+                                             u##sz old,                \
                                              u##sz new)                \
 {                                                                      \
        register unsigned long x0 asm ("x0") = (unsigned long)ptr;      \
-       register unsigned long x1 asm ("x1") = old;                     \
+       register u##sz x1 asm ("x1") = old;                             \
        register u##sz x2 asm ("x2") = new;                             \
                                                                        \
        asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
 
 {                                                                      \
        switch (size) {                                                 \
        case 1:                                                         \
-               return __cmpxchg_case##sfx##_8(ptr, (u8)old, new);      \
+               return __cmpxchg_case##sfx##_8(ptr, old, new);          \
        case 2:                                                         \
-               return __cmpxchg_case##sfx##_16(ptr, (u16)old, new);    \
+               return __cmpxchg_case##sfx##_16(ptr, old, new);         \
        case 4:                                                         \
                return __cmpxchg_case##sfx##_32(ptr, old, new);         \
        case 8:                                                         \