int result;                                                     \
                                                                        \
        asm volatile("// atomic_" #op "\n"                              \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w0, %2\n"                                              \
 "      " #asm_op "     %w0, %w0, %w3\n"                                \
 "      stxr    %w1, %w0, %2\n"                                         \
        int result;                                                     \
                                                                        \
        asm volatile("// atomic_" #op "_return\n"                       \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w0, %2\n"                                              \
 "      " #asm_op "     %w0, %w0, %w3\n"                                \
 "      stlxr   %w1, %w0, %2\n"                                         \
        int oldval;
 
        asm volatile("// atomic_cmpxchg\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
 "      eor     %w0, %w1, %w3\n"
 "      cbnz    %w0, 2f\n"
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "\n"                            \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %0, %2\n"                                               \
 "      " #asm_op "     %0, %0, %3\n"                                   \
 "      stxr    %w1, %0, %2\n"                                          \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "_return\n"                     \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %0, %2\n"                                               \
 "      " #asm_op "     %0, %0, %3\n"                                   \
 "      stlxr   %w1, %0, %2\n"                                          \
        unsigned long res;
 
        asm volatile("// atomic64_cmpxchg\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %1, %2\n"
 "      eor     %0, %1, %3\n"
 "      cbnz    %w0, 2f\n"
        unsigned long tmp;
 
        asm volatile("// atomic64_dec_if_positive\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %0, %2\n"
 "      subs    %0, %0, #1\n"
 "      b.mi    2f\n"
        unsigned long tmp, oldval;                                      \
                                                                        \
        asm volatile(                                                   \
+       "       prfm    pstl1strm, %2\n"                                \
        "1:     ldxr" #sz "\t%" #w "[oldval], %[v]\n"                   \
        "       eor     %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"  \
        "       cbnz    %" #w "[tmp], 2f\n"                             \
        unsigned long tmp, ret;                                         \
                                                                        \
        asm volatile("// __cmpxchg_double" #name "\n"                   \
+       "       prfm    pstl1strm, %2\n"                                \
        "1:     ldxp    %0, %1, %2\n"                                   \
        "       eor     %0, %0, %3\n"                                   \
        "       eor     %1, %1, %4\n"                                   \
 
        case 1:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxrb   %w0, %2\n"
                "       stlxrb  %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpalb  %w3, %w0, %2\n"
                "       nop\n"
                "       nop")
        case 2:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxrh   %w0, %2\n"
                "       stlxrh  %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpalh  %w3, %w0, %2\n"
                "       nop\n"
                "       nop")
        case 4:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxr    %w0, %2\n"
                "       stlxr   %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpal   %w3, %w0, %2\n"
                "       nop\n"
                "       nop")
        case 8:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxr    %0, %2\n"
                "       stlxr   %w1, %3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpal   %3, %0, %2\n"
                "       nop\n"
                "       nop")
 
        asm volatile(                                                   \
        ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,            \
                    CONFIG_ARM64_PAN)                                   \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w1, %2\n"                                              \
        insn "\n"                                                       \
 "2:    stlxr   %w3, %w0, %2\n"                                         \
                return -EFAULT;
 
        asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
 "      sub     %w3, %w1, %w4\n"
 "      cbnz    %w3, 3f\n"
 
        eor     w0, w0, w3              // Clear low bits
        mov     x2, #1
        add     x1, x1, x0, lsr #3      // Get word offset
+alt_lse "      prfm    pstl1strm, [x1]",       "nop"
        lsl     x3, x2, x3              // Create mask
 
 alt_lse        "1:     ldxr    x2, [x1]",              "\lse   x3, [x1]"
        eor     w0, w0, w3              // Clear low bits
        mov     x2, #1
        add     x1, x1, x0, lsr #3      // Get word offset
+alt_lse "      prfm    pstl1strm, [x1]",       "nop"
        lsl     x4, x2, x3              // Create mask
 
 alt_lse        "1:     ldxr    x2, [x1]",              "\lse   x4, x2, [x1]"