this_cpu_cmpxchg: x86: switch this_cpu_cmpxchg to locked, add _local function
authorMarcelo Tosatti <mtosatti@redhat.com>
Mon, 20 Mar 2023 18:03:37 +0000 (15:03 -0300)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 28 Mar 2023 23:25:08 +0000 (16:25 -0700)
Goal is to have vmstat_shepherd to transfer from per-CPU counters to
global counters remotely.  For this, an atomic this_cpu_cmpxchg is
necessary.

Following the kernel convention for cmpxchg/cmpxchg_local, change x86's
this_cpu_cmpxchg_ helpers to be atomic.  and add this_cpu_cmpxchg_local_
helpers which are not atomic.

Link: https://lkml.kernel.org/r/20230320180745.658574087@redhat.com
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: "Russell King (Oracle)" <linux@armlinux.org.uk>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
arch/x86/include/asm/percpu.h

index 13c0d63ed55e428226ecd004dd078ed9d412eb55..bae00203c08f4a7c35b4c4402f1082952cb1782f 100644 (file)
@@ -197,11 +197,11 @@ do {                                                                      \
  * cmpxchg has no such implied lock semantics as a result it is much
  * more efficient for cpu local operations.
  */
-#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval)              \
+#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval, lockp)       \
 ({                                                                     \
        __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval);       \
        __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval);       \
-       asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]",               \
+       asm qual (__pcpu_op2_##size(lockp "cmpxchg", "%[nval]",         \
                                    __percpu_arg([var]))                \
                  : [oval] "+a" (pco_old__),                            \
                    [var] "+m" (_var)                                   \
@@ -279,16 +279,20 @@ do {                                                                      \
 #define raw_cpu_add_return_1(pcp, val)         percpu_add_return_op(1, , pcp, val)
 #define raw_cpu_add_return_2(pcp, val)         percpu_add_return_op(2, , pcp, val)
 #define raw_cpu_add_return_4(pcp, val)         percpu_add_return_op(4, , pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval)     percpu_cmpxchg_op(1, , pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval)     percpu_cmpxchg_op(2, , pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval)     percpu_cmpxchg_op(4, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval)     percpu_cmpxchg_op(1, , pcp, oval, nval, "")
+#define raw_cpu_cmpxchg_2(pcp, oval, nval)     percpu_cmpxchg_op(2, , pcp, oval, nval, "")
+#define raw_cpu_cmpxchg_4(pcp, oval, nval)     percpu_cmpxchg_op(4, , pcp, oval, nval, "")
 
 #define this_cpu_add_return_1(pcp, val)                percpu_add_return_op(1, volatile, pcp, val)
 #define this_cpu_add_return_2(pcp, val)                percpu_add_return_op(2, volatile, pcp, val)
 #define this_cpu_add_return_4(pcp, val)                percpu_add_return_op(4, volatile, pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval)    percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval)    percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval)    percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_local_1(pcp, oval, nval)      percpu_cmpxchg_op(1, volatile, pcp, oval, nval, "")
+#define this_cpu_cmpxchg_local_2(pcp, oval, nval)      percpu_cmpxchg_op(2, volatile, pcp, oval, nval, "")
+#define this_cpu_cmpxchg_local_4(pcp, oval, nval)      percpu_cmpxchg_op(4, volatile, pcp, oval, nval, "")
+
+#define this_cpu_cmpxchg_1(pcp, oval, nval)    percpu_cmpxchg_op(1, volatile, pcp, oval, nval, LOCK_PREFIX)
+#define this_cpu_cmpxchg_2(pcp, oval, nval)    percpu_cmpxchg_op(2, volatile, pcp, oval, nval, LOCK_PREFIX)
+#define this_cpu_cmpxchg_4(pcp, oval, nval)    percpu_cmpxchg_op(4, volatile, pcp, oval, nval, LOCK_PREFIX)
 
 #ifdef CONFIG_X86_CMPXCHG64
 #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2)            \
@@ -319,16 +323,17 @@ do {                                                                      \
 #define raw_cpu_or_8(pcp, val)                 percpu_to_op(8, , "or", (pcp), val)
 #define raw_cpu_add_return_8(pcp, val)         percpu_add_return_op(8, , pcp, val)
 #define raw_cpu_xchg_8(pcp, nval)              raw_percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval)     percpu_cmpxchg_op(8, , pcp, oval, nval)
-
-#define this_cpu_read_8(pcp)                   percpu_from_op(8, volatile, "mov", pcp)
-#define this_cpu_write_8(pcp, val)             percpu_to_op(8, volatile, "mov", (pcp), val)
-#define this_cpu_add_8(pcp, val)               percpu_add_op(8, volatile, (pcp), val)
-#define this_cpu_and_8(pcp, val)               percpu_to_op(8, volatile, "and", (pcp), val)
-#define this_cpu_or_8(pcp, val)                        percpu_to_op(8, volatile, "or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val)                percpu_add_return_op(8, volatile, pcp, val)
-#define this_cpu_xchg_8(pcp, nval)             percpu_xchg_op(8, volatile, pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval)    percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval)     percpu_cmpxchg_op(8, , pcp, oval, nval, "")
+
+#define this_cpu_read_8(pcp)                           percpu_from_op(8, volatile, "mov", pcp)
+#define this_cpu_write_8(pcp, val)                     percpu_to_op(8, volatile, "mov", (pcp), val)
+#define this_cpu_add_8(pcp, val)                       percpu_add_op(8, volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val)                       percpu_to_op(8, volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val)                                percpu_to_op(8, volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val)                        percpu_add_return_op(8, volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval)                     percpu_xchg_op(8, volatile, pcp, nval)
+#define this_cpu_cmpxchg_local_8(pcp, oval, nval)      percpu_cmpxchg_op(8, volatile, pcp, oval, nval, "")
+#define this_cpu_cmpxchg_8(pcp, oval, nval)            percpu_cmpxchg_op(8, volatile, pcp, oval, nval, LOCK_PREFIX)
 
 /*
  * Pretty complex macro to generate cmpxchg16 instruction.  The instruction