]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
riscv: Add Zawrs support for spinlocks
authorChristoph Müllner <christoph.muellner@vrull.eu>
Fri, 26 Apr 2024 10:08:23 +0000 (12:08 +0200)
committerPalmer Dabbelt <palmer@rivosinc.com>
Fri, 12 Jul 2024 10:16:42 +0000 (03:16 -0700)
RISC-V code uses the generic ticket lock implementation, which calls
the macros smp_cond_load_relaxed() and smp_cond_load_acquire().
Introduce a RISC-V specific implementation of smp_cond_load_relaxed()
which applies WRS.NTO of the Zawrs extension in order to reduce power
consumption while waiting and allows hypervisors to enable guests to
trap while waiting. smp_cond_load_acquire() doesn't need a RISC-V
specific implementation as the generic implementation is based on
smp_cond_load_relaxed() and smp_acquire__after_ctrl_dep() sufficiently
provides the acquire semantics.

This implementation is heavily based on Arm's approach which is the
approach Andrea Parri also suggested.

The Zawrs specification can be found here:
https://github.com/riscv/riscv-zawrs/blob/main/zawrs.adoc

Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
Co-developed-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/20240426100820.14762-11-ajones@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
arch/riscv/Kconfig
arch/riscv/include/asm/barrier.h
arch/riscv/include/asm/cmpxchg.h
arch/riscv/include/asm/hwcap.h
arch/riscv/include/asm/insn-def.h
arch/riscv/kernel/cpufeature.c

index 7427d808833717617662cc41edbeb970ce4a3b04..34bbe6b70546b6e65265e706b4a52ea4ef181a7d 100644 (file)
@@ -578,6 +578,19 @@ config RISCV_ISA_V_PREEMPTIVE
          preemption. Enabling this config will result in higher memory
          consumption due to the allocation of per-task's kernel Vector context.
 
+config RISCV_ISA_ZAWRS
+       bool "Zawrs extension support for more efficient busy waiting"
+       depends on RISCV_ALTERNATIVE
+       default y
+       help
+         The Zawrs extension defines instructions to be used in polling loops
+         which allow a hart to enter a low-power state or to trap to the
+         hypervisor while waiting on a store to a memory location. Enable the
+         use of these instructions in the kernel when the Zawrs extension is
+         detected at boot.
+
+         If you don't know what to do here, say Y.
+
 config TOOLCHAIN_HAS_ZBB
        bool
        default y
index 880b56d8480d19cb8038c366988a7369eeeaff4a..e1d9bf1deca6852e962816af066c13f29c734231 100644 (file)
@@ -11,6 +11,7 @@
 #define _ASM_RISCV_BARRIER_H
 
 #ifndef __ASSEMBLY__
+#include <asm/cmpxchg.h>
 #include <asm/fence.h>
 
 #define nop()          __asm__ __volatile__ ("nop")
 #define __smp_rmb()    RISCV_FENCE(r, r)
 #define __smp_wmb()    RISCV_FENCE(w, w)
 
-#define __smp_store_release(p, v)                                      \
-do {                                                                   \
-       compiletime_assert_atomic_type(*p);                             \
-       RISCV_FENCE(rw, w);                                             \
-       WRITE_ONCE(*p, v);                                              \
-} while (0)
-
-#define __smp_load_acquire(p)                                          \
-({                                                                     \
-       typeof(*p) ___p1 = READ_ONCE(*p);                               \
-       compiletime_assert_atomic_type(*p);                             \
-       RISCV_FENCE(r, rw);                                             \
-       ___p1;                                                          \
-})
-
 /*
  * This is a very specific barrier: it's currently only used in two places in
  * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
@@ -70,6 +56,35 @@ do {                                                                 \
  */
 #define smp_mb__after_spinlock()       RISCV_FENCE(iorw, iorw)
 
+#define __smp_store_release(p, v)                                      \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       RISCV_FENCE(rw, w);                                             \
+       WRITE_ONCE(*p, v);                                              \
+} while (0)
+
+#define __smp_load_acquire(p)                                          \
+({                                                                     \
+       typeof(*p) ___p1 = READ_ONCE(*p);                               \
+       compiletime_assert_atomic_type(*p);                             \
+       RISCV_FENCE(r, rw);                                             \
+       ___p1;                                                          \
+})
+
+#ifdef CONFIG_RISCV_ISA_ZAWRS
+#define smp_cond_load_relaxed(ptr, cond_expr) ({                       \
+       typeof(ptr) __PTR = (ptr);                                      \
+       __unqual_scalar_typeof(*ptr) VAL;                               \
+       for (;;) {                                                      \
+               VAL = READ_ONCE(*__PTR);                                \
+               if (cond_expr)                                          \
+                       break;                                          \
+               __cmpwait_relaxed(ptr, VAL);                            \
+       }                                                               \
+       (typeof(*ptr))VAL;                                              \
+})
+#endif
+
 #include <asm-generic/barrier.h>
 
 #endif /* __ASSEMBLY__ */
index 2fee65cc8443246c07ca1f2c53e896cad426ae77..725276dcb99602e8f90542a793f3f1a2d1e55ba4 100644 (file)
@@ -8,7 +8,10 @@
 
 #include <linux/bug.h>
 
+#include <asm/alternative-macros.h>
 #include <asm/fence.h>
+#include <asm/hwcap.h>
+#include <asm/insn-def.h>
 
 #define __xchg_relaxed(ptr, new, size)                                 \
 ({                                                                     \
        arch_cmpxchg_relaxed((ptr), (o), (n));                          \
 })
 
+#ifdef CONFIG_RISCV_ISA_ZAWRS
+/*
+ * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
+ * @val we expect it to still terminate within a "reasonable" amount of time
+ * for an implementation-specific other reason, a pending, locally-enabled
+ * interrupt, or because it has been configured to raise an illegal
+ * instruction exception.
+ */
+static __always_inline void __cmpwait(volatile void *ptr,
+                                     unsigned long val,
+                                     int size)
+{
+       unsigned long tmp;
+
+       asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
+                            0, RISCV_ISA_EXT_ZAWRS, 1)
+                : : : : no_zawrs);
+
+       switch (size) {
+       case 4:
+               asm volatile(
+               "       lr.w    %0, %1\n"
+               "       xor     %0, %0, %2\n"
+               "       bnez    %0, 1f\n"
+                       ZAWRS_WRS_NTO "\n"
+               "1:"
+               : "=&r" (tmp), "+A" (*(u32 *)ptr)
+               : "r" (val));
+               break;
+#if __riscv_xlen == 64
+       case 8:
+               asm volatile(
+               "       lr.d    %0, %1\n"
+               "       xor     %0, %0, %2\n"
+               "       bnez    %0, 1f\n"
+                       ZAWRS_WRS_NTO "\n"
+               "1:"
+               : "=&r" (tmp), "+A" (*(u64 *)ptr)
+               : "r" (val));
+               break;
+#endif
+       default:
+               BUILD_BUG();
+       }
+
+       return;
+
+no_zawrs:
+       asm volatile(RISCV_PAUSE : : : "memory");
+}
+
+#define __cmpwait_relaxed(ptr, val) \
+       __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+#endif
+
 #endif /* _ASM_RISCV_CMPXCHG_H */
index e17d0078a65116810cda686225c78e0ff420a603..5b358c3cf212fd13e54e4a333453f2f7260504cf 100644 (file)
@@ -81,6 +81,7 @@
 #define RISCV_ISA_EXT_ZTSO             72
 #define RISCV_ISA_EXT_ZACAS            73
 #define RISCV_ISA_EXT_XANDESPMU                74
+#define RISCV_ISA_EXT_ZAWRS            75
 
 #define RISCV_ISA_EXT_XLINUXENVCFG     127
 
index 64dffaa21bfa3d66139a522e78c38015b6f3291b..9a913010cdd93cdfdd93f467e7880e20cce0dd2b 100644 (file)
               RS1(base), SIMM12(4))
 
 #define RISCV_PAUSE    ".4byte 0x100000f"
+#define ZAWRS_WRS_NTO  ".4byte 0x00d00073"
+#define ZAWRS_WRS_STO  ".4byte 0x01d00073"
 
 #endif /* __ASM_INSN_DEF_H */
index 3ed2359eae353f863561c02983e4e5e5bf27603d..02de9eaa3f4210e037d3a9aa96222db1346f509e 100644 (file)
@@ -257,6 +257,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
        __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
        __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
        __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
+       __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
        __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
        __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH),
        __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN),