From 4f2a0b765c9731d2fa94e209ee9ae0e96b280f17 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:41 +0100 Subject: [PATCH 01/16] : Cover all possible x86 CPU cache sizes Add size macros for 24/192/384 Kilobytes and 3/6/12/18/24 Megabytes. With that, the x86 subsystem can avoid locally defining its own macros for CPU cache sizes. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20250304085152.51092-31-darwi@linutronix.de --- include/linux/sizes.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/sizes.h b/include/linux/sizes.h index c3a00b967d18..49039494076f 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -23,17 +23,25 @@ #define SZ_4K 0x00001000 #define SZ_8K 0x00002000 #define SZ_16K 0x00004000 +#define SZ_24K 0x00006000 #define SZ_32K 0x00008000 #define SZ_64K 0x00010000 #define SZ_128K 0x00020000 +#define SZ_192K 0x00030000 #define SZ_256K 0x00040000 +#define SZ_384K 0x00060000 #define SZ_512K 0x00080000 #define SZ_1M 0x00100000 #define SZ_2M 0x00200000 +#define SZ_3M 0x00300000 #define SZ_4M 0x00400000 +#define SZ_6M 0x00600000 #define SZ_8M 0x00800000 +#define SZ_12M 0x00c00000 #define SZ_16M 0x01000000 +#define SZ_18M 0x01200000 +#define SZ_24M 0x01800000 #define SZ_32M 0x02000000 #define SZ_64M 0x04000000 #define SZ_128M 0x08000000 -- 2.51.0 From e1c49eaee52384ec9e3734138b3929a35b64e0c3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sun, 2 Mar 2025 17:20:59 -0800 Subject: [PATCH 02/16] KVM: VMX: Use named operands in inline asm Convert the non-asm-goto version of the inline asm in __vmcs_readl() to use named operands, similar to its asm-goto version. Do this in preparation of changing the ASM_CALL_CONSTRAINT primitive. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Sean Christopherson Cc: linux-kernel@vger.kernel.org --- arch/x86/kvm/vmx/vmx_ops.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 633c87e2fd92..96677576c836 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -118,7 +118,7 @@ do_exception: #else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ - asm volatile("1: vmread %2, %1\n\t" + asm volatile("1: vmread %[field], %[output]\n\t" ".byte 0x3e\n\t" /* branch taken hint */ "ja 3f\n\t" @@ -127,24 +127,26 @@ do_exception: * @field, and bounce through the trampoline to preserve * volatile registers. */ - "xorl %k1, %k1\n\t" + "xorl %k[output], %k[output]\n\t" "2:\n\t" - "push %1\n\t" - "push %2\n\t" + "push %[output]\n\t" + "push %[field]\n\t" "call vmread_error_trampoline\n\t" /* * Unwind the stack. Note, the trampoline zeros out the * memory for @fault so that the result is '0' on error. */ - "pop %2\n\t" - "pop %1\n\t" + "pop %[field]\n\t" + "pop %[output]\n\t" "3:\n\t" /* VMREAD faulted. As above, except push '1' for @fault. */ - _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %1) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[output]) - : ASM_CALL_CONSTRAINT, "=&r"(value) : "r"(field) : "cc"); + : ASM_CALL_CONSTRAINT, [output] "=&r" (value) + : [field] "r" (field) + : "cc"); return value; #endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ -- 2.51.0 From 9064a8e556fa70ccfd9c414350406ed4887d3059 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sun, 2 Mar 2025 17:21:00 -0800 Subject: [PATCH 03/16] x86/hyperv: Use named operands in inline asm Use named operands in inline asm to make it easier to change the constraint order. Do this in preparation of changing the ASM_CALL_CONSTRAINT primitive. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Wei Liu Cc: Dexuan Cui Cc: linux-kernel@vger.kernel.org --- arch/x86/include/asm/mshyperv.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f91ab1e75f9f..5e6193dbc97e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -77,11 +77,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_tdx_hypercall(control, input_address, output_address); if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__("mov %4, %%r8\n" + __asm__ __volatile__("mov %[output_address], %%r8\n" "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) - : "r" (output_address) + : [output_address] "r" (output_address) : "cc", "memory", "r8", "r9", "r10", "r11"); return hv_status; } @@ -89,12 +89,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) if (!hv_hypercall_pg) return U64_MAX; - __asm__ __volatile__("mov %4, %%r8\n" + __asm__ __volatile__("mov %[output_address], %%r8\n" CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) - : "r" (output_address), - THUNK_TARGET(hv_hypercall_pg) + : [output_address] "r" (output_address), + THUNK_TARGET(hv_hypercall_pg) : "cc", "memory", "r8", "r9", "r10", "r11"); #else u32 input_address_hi = upper_32_bits(input_address); @@ -187,18 +187,18 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) return hv_tdx_hypercall(control, input1, input2); if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__("mov %4, %%r8\n" + __asm__ __volatile__("mov %[input2], %%r8\n" "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) - : "r" (input2) + : [input2] "r" (input2) : "cc", "r8", "r9", "r10", "r11"); } else { - __asm__ __volatile__("mov %4, %%r8\n" + __asm__ __volatile__("mov %[input2], %%r8\n" CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) - : "r" (input2), + : [input2] "r" (input2), THUNK_TARGET(hv_hypercall_pg) : "cc", "r8", "r9", "r10", "r11"); } -- 2.51.0 From 224788b63a2e426b6b82c76456a068a2ab87610f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sun, 2 Mar 2025 17:21:01 -0800 Subject: [PATCH 04/16] x86/alternatives: Simplify alternative_call() interface Separate the input from the clobbers in preparation for appending the input. Do this in preparation of changing the ASM_CALL_CONSTRAINT primitive. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: linux-kernel@vger.kernel.org --- arch/x86/include/asm/alternative.h | 24 +++----- arch/x86/include/asm/apic.h | 4 +- arch/x86/include/asm/asm.h | 11 ++++ arch/x86/include/asm/atomic64_32.h | 96 ++++++++++++++++++------------ arch/x86/include/asm/page_64.h | 4 +- 5 files changed, 82 insertions(+), 57 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index a2141665239b..52626a7251e6 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -237,10 +237,12 @@ static inline int alternatives_text_reserved(void *start, void *end) * references: i.e., if used for a function, it would add the PLT * suffix. */ -#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ +#define alternative_call(oldfunc, newfunc, ft_flags, output, input, clobbers...) \ asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ : ALT_OUTPUT_SP(output) \ - : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) + : [old] "i" (oldfunc), [new] "i" (newfunc) \ + COMMA(input) \ + : clobbers) /* * Like alternative_call, but there are two features and respective functions. @@ -249,24 +251,14 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, old function is used. */ #define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ - output, input...) \ + output, input, clobbers...) \ asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ "call %c[new2]", ft_flags2) \ : ALT_OUTPUT_SP(output) \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ - [new2] "i" (newfunc2), ## input) - -/* - * use this macro(s) if you need more than one output parameter - * in alternative_io - */ -#define ASM_OUTPUT2(a...) a - -/* - * use this macro if you need clobbers but no inputs in - * alternative_{input,io,call}() - */ -#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr + [new2] "i" (newfunc2) \ + COMMA(input) \ + : clobbers) #define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f21ff1932699..c903d358405d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -99,8 +99,8 @@ static inline void native_apic_mem_write(u32 reg, u32 v) volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, - ASM_OUTPUT2("=r" (v), "=m" (*addr)), - ASM_OUTPUT2("0" (v), "m" (*addr))); + ASM_OUTPUT("=r" (v), "=m" (*addr)), + ASM_INPUT("0" (v), "m" (*addr))); } static inline u32 native_apic_mem_read(u32 reg) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 2bec0c89a95c..975ae7a9397e 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -213,6 +213,17 @@ static __always_inline __pure void *rip_rel_ptr(void *p) /* For C file, we already have NOKPROBE_SYMBOL macro */ +/* Insert a comma if args are non-empty */ +#define COMMA(x...) __COMMA(x) +#define __COMMA(...) , ##__VA_ARGS__ + +/* + * Combine multiple asm inline constraint args into a single arg for passing to + * another macro. + */ +#define ASM_OUTPUT(x...) x +#define ASM_INPUT(x...) x + /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 797085ecaaa4..ab838205c1c6 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -49,16 +49,19 @@ static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v) #endif #ifdef CONFIG_X86_CX8 -#define __alternative_atomic64(f, g, out, in...) \ - asm volatile("call %c[func]" \ +#define __alternative_atomic64(f, g, out, in, clobbers...) \ + asm volatile("call %c[func]" \ : ALT_OUTPUT_SP(out) \ - : [func] "i" (atomic64_##g##_cx8), ## in) + : [func] "i" (atomic64_##g##_cx8) \ + COMMA(in) \ + : clobbers) #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) #else -#define __alternative_atomic64(f, g, out, in...) \ - alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \ - X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in) +#define __alternative_atomic64(f, g, out, in, clobbers...) \ + alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \ + X86_FEATURE_CX8, ASM_OUTPUT(out), \ + ASM_INPUT(in), clobbers) #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \ ATOMIC64_DECL_ONE(sym##_386) @@ -69,8 +72,8 @@ ATOMIC64_DECL_ONE(inc_386); ATOMIC64_DECL_ONE(dec_386); #endif -#define alternative_atomic64(f, out, in...) \ - __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in) +#define alternative_atomic64(f, out, in, clobbers...) \ + __alternative_atomic64(f, f, ASM_OUTPUT(out), ASM_INPUT(in), clobbers) ATOMIC64_DECL(read); ATOMIC64_DECL(set); @@ -105,9 +108,10 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) s64 o; unsigned high = (unsigned)(n >> 32); unsigned low = (unsigned)n; - alternative_atomic64(xchg, "=&A" (o), - "S" (v), "b" (low), "c" (high) - : "memory"); + alternative_atomic64(xchg, + "=&A" (o), + ASM_INPUT("S" (v), "b" (low), "c" (high)), + "memory"); return o; } #define arch_atomic64_xchg arch_atomic64_xchg @@ -116,23 +120,25 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; - alternative_atomic64(set, /* no output */, - "S" (v), "b" (low), "c" (high) - : "eax", "edx", "memory"); + alternative_atomic64(set, + /* no output */, + ASM_INPUT("S" (v), "b" (low), "c" (high)), + "eax", "edx", "memory"); } static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { s64 r; - alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); + alternative_atomic64(read, "=&A" (r), "c" (v), "memory"); return r; } static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { alternative_atomic64(add_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); return i; } #define arch_atomic64_add_return arch_atomic64_add_return @@ -140,8 +146,9 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { alternative_atomic64(sub_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); return i; } #define arch_atomic64_sub_return arch_atomic64_sub_return @@ -149,8 +156,10 @@ static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) { s64 a; - alternative_atomic64(inc_return, "=&A" (a), - "S" (v) : "memory", "ecx"); + alternative_atomic64(inc_return, + "=&A" (a), + "S" (v), + "memory", "ecx"); return a; } #define arch_atomic64_inc_return arch_atomic64_inc_return @@ -158,8 +167,10 @@ static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) { s64 a; - alternative_atomic64(dec_return, "=&A" (a), - "S" (v) : "memory", "ecx"); + alternative_atomic64(dec_return, + "=&A" (a), + "S" (v), + "memory", "ecx"); return a; } #define arch_atomic64_dec_return arch_atomic64_dec_return @@ -167,28 +178,34 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); } static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); } static __always_inline void arch_atomic64_inc(atomic64_t *v) { - __alternative_atomic64(inc, inc_return, /* no output */, - "S" (v) : "memory", "eax", "ecx", "edx"); + __alternative_atomic64(inc, inc_return, + /* no output */, + "S" (v), + "memory", "eax", "ecx", "edx"); } #define arch_atomic64_inc arch_atomic64_inc static __always_inline void arch_atomic64_dec(atomic64_t *v) { - __alternative_atomic64(dec, dec_return, /* no output */, - "S" (v) : "memory", "eax", "ecx", "edx"); + __alternative_atomic64(dec, dec_return, + /* no output */, + "S" (v), + "memory", "eax", "ecx", "edx"); } #define arch_atomic64_dec arch_atomic64_dec @@ -197,8 +214,9 @@ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); alternative_atomic64(add_unless, - ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)), - "S" (v) : "memory"); + ASM_OUTPUT("+A" (a), "+c" (low), "+D" (high)), + "S" (v), + "memory"); return (int)a; } #define arch_atomic64_add_unless arch_atomic64_add_unless @@ -206,8 +224,10 @@ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; - alternative_atomic64(inc_not_zero, "=&a" (r), - "S" (v) : "ecx", "edx", "memory"); + alternative_atomic64(inc_not_zero, + "=&a" (r), + "S" (v), + "ecx", "edx", "memory"); return r; } #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero @@ -215,8 +235,10 @@ static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v) static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { s64 r; - alternative_atomic64(dec_if_positive, "=&A" (r), - "S" (v) : "ecx", "memory"); + alternative_atomic64(dec_if_positive, + "=&A" (r), + "S" (v), + "ecx", "memory"); return r; } #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index d63576608ce7..d081e8000f34 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -55,8 +55,8 @@ static inline void clear_page(void *page) clear_page_rep, X86_FEATURE_REP_GOOD, clear_page_erms, X86_FEATURE_ERMS, "=D" (page), - "D" (page) - : "cc", "memory", "rax", "rcx"); + "D" (page), + "cc", "memory", "rax", "rcx"); } void copy_page(void *to, void *from); -- 2.51.0 From 0ec914707c3ed052ed26eb88f9300109030a7fb2 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 3 Mar 2025 16:54:21 +0100 Subject: [PATCH 05/16] x86/irq/32: Use named operands in inline asm Also use inout "+" constraint modifier where appropriate. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303155446.112769-1-ubizjak@gmail.com --- arch/x86/kernel/irq_32.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index dc1049c01f9b..c4719c40252f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -54,12 +54,11 @@ static inline void print_stack_overflow(void) { } static void call_on_stack(void *func, void *stack) { - asm volatile("xchgl %%ebx,%%esp \n" + asm volatile("xchgl %[sp], %%esp\n" CALL_NOSPEC - "movl %%ebx,%%esp \n" - : "=b" (stack) - : "0" (stack), - [thunk_target] "D"(func) + "movl %[sp], %%esp" + : [sp] "+b" (stack) + : [thunk_target] "D" (func) : "memory", "cc", "edx", "ecx", "eax"); } @@ -71,7 +70,7 @@ static inline void *current_stack(void) static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) { struct irq_stack *curstk, *irqstk; - u32 *isp, *prev_esp, arg1; + u32 *isp, *prev_esp; curstk = (struct irq_stack *) current_stack(); irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr); @@ -94,12 +93,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); - asm volatile("xchgl %%ebx,%%esp \n" + asm volatile("xchgl %[sp], %%esp\n" CALL_NOSPEC - "movl %%ebx,%%esp \n" - : "=a" (arg1), "=b" (isp) - : "0" (desc), "1" (isp), - [thunk_target] "D" (desc->handle_irq) + "movl %[sp], %%esp" + : "+a" (desc), [sp] "+b" (isp) + : [thunk_target] "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; } -- 2.51.0 From 76f71137811a6dfa52b3e22a86a772e5753021d3 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 3 Mar 2025 16:54:22 +0100 Subject: [PATCH 06/16] x86/irq/32: Add missing clobber to inline asm i386 ABI declares %edx as a call-clobbered register. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303155446.112769-2-ubizjak@gmail.com --- arch/x86/kernel/irq_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index c4719c40252f..eab458009f97 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -98,7 +98,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) "movl %[sp], %%esp" : "+a" (desc), [sp] "+b" (isp) : [thunk_target] "D" (desc->handle_irq) - : "memory", "cc", "ecx"); + : "memory", "cc", "edx", "ecx"); return 1; } -- 2.51.0 From d4432fb5b8798a7663974bed277a8a6e330a50d8 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 3 Mar 2025 16:54:24 +0100 Subject: [PATCH 07/16] x86/irq/32: Use current_stack_pointer to avoid asm() in check_stack_overflow() Make code more readable by using the 'current_stack_pointer' global variable. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303155446.112769-4-ubizjak@gmail.com --- arch/x86/kernel/irq_32.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index eab458009f97..8c7babbcf6b7 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -31,10 +31,7 @@ int sysctl_panic_on_stackoverflow __read_mostly; /* Debugging check for stack overflow: is there less than 1KB free? */ static int check_stack_overflow(void) { - long sp; - - __asm__ __volatile__("andl %%esp,%0" : - "=r" (sp) : "0" (THREAD_SIZE - 1)); + unsigned long sp = current_stack_pointer & (THREAD_SIZE - 1); return sp < (sizeof(struct thread_info) + STACK_WARN); } -- 2.51.0 From c8b584fe82d0f1e478a598f954943b095a4a8f5c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 3 Mar 2025 16:54:25 +0100 Subject: [PATCH 08/16] x86/irq/32: Change some static functions to bool The return values of these functions is 0/1, but they use an int type instead of bool: check_stack_overflow() execute_on_irq_stack() Change the type of these function to bool and adjust their return values and affected helper variables. [ mingo: Rewrote the changelog ] Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303155446.112769-5-ubizjak@gmail.com --- arch/x86/kernel/irq_32.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 8c7babbcf6b7..d301208d35d0 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -29,7 +29,7 @@ int sysctl_panic_on_stackoverflow __read_mostly; /* Debugging check for stack overflow: is there less than 1KB free? */ -static int check_stack_overflow(void) +static bool check_stack_overflow(void) { unsigned long sp = current_stack_pointer & (THREAD_SIZE - 1); @@ -45,7 +45,7 @@ static void print_stack_overflow(void) } #else -static inline int check_stack_overflow(void) { return 0; } +static inline bool check_stack_overflow(void) { return false; } static inline void print_stack_overflow(void) { } #endif @@ -64,7 +64,7 @@ static inline void *current_stack(void) return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); } -static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) +static inline bool execute_on_irq_stack(bool overflow, struct irq_desc *desc) { struct irq_stack *curstk, *irqstk; u32 *isp, *prev_esp; @@ -79,7 +79,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) * current stack (which is the irq stack already after all) */ if (unlikely(curstk == irqstk)) - return 0; + return false; isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); @@ -96,7 +96,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) : "+a" (desc), [sp] "+b" (isp) : [thunk_target] "D" (desc->handle_irq) : "memory", "cc", "edx", "ecx"); - return 1; + return true; } /* @@ -145,7 +145,7 @@ void do_softirq_own_stack(void) void __handle_irq(struct irq_desc *desc, struct pt_regs *regs) { - int overflow = check_stack_overflow(); + bool overflow = check_stack_overflow(); if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) { if (unlikely(overflow)) -- 2.51.0 From ab2bb9c084f7e3b641ceba91efc33b3adcd1846e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 3 Mar 2025 11:52:36 -0500 Subject: [PATCH 09/16] percpu: Introduce percpu hot section Add a subsection to the percpu data for frequently accessed variables that should remain cached on each processor. These varables should not be accessed from other processors to avoid cacheline bouncing. This will replace the pcpu_hot struct on x86, and open up similar functionality to other architectures and the kernel core. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Acked-by: Uros Bizjak Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303165246.2175811-2-brgerst@gmail.com --- include/asm-generic/vmlinux.lds.h | 11 +++++++++++ include/linux/percpu-defs.h | 13 +++++++++++++ 2 files changed, 24 insertions(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b32e453acf9d..c4e8fac50254 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -385,6 +385,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) . = ALIGN(PAGE_SIZE); \ __nosave_end = .; +#define CACHE_HOT_DATA(align) \ + . = ALIGN(align); \ + *(SORT_BY_ALIGNMENT(.data..hot.*)) \ + . = ALIGN(align); + #define PAGE_ALIGNED_DATA(page_align) \ . = ALIGN(page_align); \ *(.data..page_aligned) \ @@ -1065,6 +1070,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ . = ALIGN(cacheline); \ + __per_cpu_hot_start = .; \ + *(SORT_BY_ALIGNMENT(.data..percpu..hot.*)) \ + __per_cpu_hot_pad = .; \ + . = ALIGN(cacheline); \ + __per_cpu_hot_end = .; \ *(.data..percpu..read_mostly) \ . = ALIGN(cacheline); \ *(.data..percpu) \ @@ -1112,6 +1122,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) INIT_TASK_DATA(inittask) \ NOSAVE_DATA \ PAGE_ALIGNED_DATA(pagealigned) \ + CACHE_HOT_DATA(cacheline) \ CACHELINE_ALIGNED_DATA(cacheline) \ READ_MOSTLY_DATA(cacheline) \ DATA_DATA \ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 40d34e032d5b..0fcacb909778 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -112,6 +112,19 @@ #define DEFINE_PER_CPU(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "") +/* + * Declaration/definition used for per-CPU variables that are frequently + * accessed and should be in a single cacheline. + * + * For use only by architecture and core code. Only use scalar or pointer + * types to maximize density. + */ +#define DECLARE_PER_CPU_CACHE_HOT(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..hot.." #name) + +#define DEFINE_PER_CPU_CACHE_HOT(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "..hot.." #name) + /* * Declaration/definition used for per-CPU variables that must be cacheline * aligned under SMP conditions so that, whilst a particular instance of the -- 2.51.0 From 972f9cdff924ca53715019b63b4d4aed69d23b1e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 3 Mar 2025 11:52:37 -0500 Subject: [PATCH 10/16] x86/percpu: Move pcpu_hot to percpu hot section Also change the alignment of the percpu hot section: - PERCPU_SECTION(INTERNODE_CACHE_BYTES) + PERCPU_SECTION(L1_CACHE_BYTES) As vSMP will muck with INTERNODE_CACHE_BYTES that invalidates the too-large-section assert we do: ASSERT(__per_cpu_hot_end - __per_cpu_hot_start <= 64, "percpu cache hot section too large") [ mingo: Added INTERNODE_CACHE_BYTES fix & explanation. ] Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Acked-by: Uros Bizjak Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303165246.2175811-3-brgerst@gmail.com --- arch/x86/include/asm/current.h | 28 +++++++++++----------------- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/vmlinux.lds.S | 5 ++++- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index bf5953883ec3..60bc66edca83 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -13,32 +13,26 @@ struct task_struct; struct pcpu_hot { - union { - struct { - struct task_struct *current_task; - int preempt_count; - int cpu_number; + struct task_struct *current_task; + int preempt_count; + int cpu_number; #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING - u64 call_depth; + u64 call_depth; #endif - unsigned long top_of_stack; - void *hardirq_stack_ptr; - u16 softirq_pending; + unsigned long top_of_stack; + void *hardirq_stack_ptr; + u16 softirq_pending; #ifdef CONFIG_X86_64 - bool hardirq_stack_inuse; + bool hardirq_stack_inuse; #else - void *softirq_stack_ptr; + void *softirq_stack_ptr; #endif - }; - u8 pad[64]; - }; }; -static_assert(sizeof(struct pcpu_hot) == 64); -DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot); +DECLARE_PER_CPU_CACHE_HOT(struct pcpu_hot, pcpu_hot); /* const-qualified alias to pcpu_hot, aliased by linker. */ -DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override, +DECLARE_PER_CPU_CACHE_HOT(const struct pcpu_hot __percpu_seg_override, const_pcpu_hot); static __always_inline struct task_struct *get_current(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 88a6707b765c..f00870b2c980 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2064,7 +2064,7 @@ static __init int setup_setcpuid(char *arg) } __setup("setcpuid=", setup_setcpuid); -DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { +DEFINE_PER_CPU_CACHE_HOT(struct pcpu_hot, pcpu_hot) = { .current_task = &init_task, .preempt_count = INIT_PREEMPT_COUNT, .top_of_stack = TOP_OF_INIT_STACK, diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1769a7126224..0ef9870ea52e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -187,6 +187,8 @@ SECTIONS PAGE_ALIGNED_DATA(PAGE_SIZE) + CACHE_HOT_DATA(L1_CACHE_BYTES) + CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) DATA_DATA @@ -327,7 +329,8 @@ SECTIONS EXIT_DATA } - PERCPU_SECTION(INTERNODE_CACHE_BYTES) + PERCPU_SECTION(L1_CACHE_BYTES) + ASSERT(__per_cpu_hot_end - __per_cpu_hot_start <= 64, "percpu cache hot section too large") RUNTIME_CONST_VARIABLES RUNTIME_CONST(ptr, USER_PTR_MAX) -- 2.51.0 From 46e8fff6d45fe44cb0939c9339b6d5936551b1e4 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 3 Mar 2025 11:52:38 -0500 Subject: [PATCH 11/16] x86/preempt: Move preempt count to percpu hot section No functional change. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Acked-by: Uros Bizjak Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303165246.2175811-4-brgerst@gmail.com --- arch/x86/include/asm/current.h | 1 - arch/x86/include/asm/preempt.h | 25 +++++++++++++------------ arch/x86/kernel/cpu/common.c | 4 +++- include/linux/preempt.h | 1 + 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 60bc66edca83..46a736d6f2ec 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -14,7 +14,6 @@ struct task_struct; struct pcpu_hot { struct task_struct *current_task; - int preempt_count; int cpu_number; #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING u64 call_depth; diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 919909d8cb77..578441db09f0 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -4,10 +4,11 @@ #include #include -#include #include +DECLARE_PER_CPU_CACHE_HOT(int, __preempt_count); + /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 @@ -23,18 +24,18 @@ */ static __always_inline int preempt_count(void) { - return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED; + return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; } static __always_inline void preempt_count_set(int pc) { int old, new; - old = raw_cpu_read_4(pcpu_hot.preempt_count); + old = raw_cpu_read_4(__preempt_count); do { new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED); - } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.preempt_count, &old, new)); + } while (!raw_cpu_try_cmpxchg_4(__preempt_count, &old, new)); } /* @@ -43,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* @@ -57,17 +58,17 @@ static __always_inline void preempt_count_set(int pc) static __always_inline void set_preempt_need_resched(void) { - raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED); + raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); } static __always_inline void clear_preempt_need_resched(void) { - raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED); + raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); } static __always_inline bool test_preempt_need_resched(void) { - return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED); + return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); } /* @@ -76,12 +77,12 @@ static __always_inline bool test_preempt_need_resched(void) static __always_inline void __preempt_count_add(int val) { - raw_cpu_add_4(pcpu_hot.preempt_count, val); + raw_cpu_add_4(__preempt_count, val); } static __always_inline void __preempt_count_sub(int val) { - raw_cpu_add_4(pcpu_hot.preempt_count, -val); + raw_cpu_add_4(__preempt_count, -val); } /* @@ -91,7 +92,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e, + return GEN_UNARY_RMWcc("decl", __my_cpu_var(__preempt_count), e, __percpu_arg([var])); } @@ -100,7 +101,7 @@ static __always_inline bool __preempt_count_dec_and_test(void) */ static __always_inline bool should_resched(int preempt_offset) { - return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset); + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPTION diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f00870b2c980..a9d61537d4a6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2066,12 +2066,14 @@ __setup("setcpuid=", setup_setcpuid); DEFINE_PER_CPU_CACHE_HOT(struct pcpu_hot, pcpu_hot) = { .current_task = &init_task, - .preempt_count = INIT_PREEMPT_COUNT, .top_of_stack = TOP_OF_INIT_STACK, }; EXPORT_PER_CPU_SYMBOL(pcpu_hot); EXPORT_PER_CPU_SYMBOL(const_pcpu_hot); +DEFINE_PER_CPU_CACHE_HOT(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); + #ifdef CONFIG_X86_64 static void wrmsrl_cstar(unsigned long val) { diff --git a/include/linux/preempt.h b/include/linux/preempt.h index ca86235ac15c..4c1af9b7e28b 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -319,6 +319,7 @@ do { \ #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier; +struct task_struct; /** * preempt_ops - notifiers called when a task is preempted and rescheduled -- 2.51.0 From 01c7bc5198e9ef5628fad0346c5cdba2633a79da Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 3 Mar 2025 11:52:39 -0500 Subject: [PATCH 12/16] x86/smp: Move cpu number to percpu hot section No functional change. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Acked-by: Uros Bizjak Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303165246.2175811-5-brgerst@gmail.com --- arch/x86/include/asm/current.h | 1 - arch/x86/include/asm/smp.h | 7 ++++--- arch/x86/kernel/setup_percpu.c | 5 ++++- kernel/bpf/verifier.c | 4 ++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 46a736d6f2ec..f988462d8b69 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -14,7 +14,6 @@ struct task_struct; struct pcpu_hot { struct task_struct *current_task; - int cpu_number; #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING u64 call_depth; #endif diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 76d7c013cd08..bcfa00232d79 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -6,7 +6,8 @@ #include #include -#include + +DECLARE_PER_CPU_CACHE_HOT(int, cpu_number); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); @@ -132,8 +133,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r); * This function is needed by all SMP systems. It must _always_ be valid * from the initial startup. */ -#define raw_smp_processor_id() this_cpu_read(pcpu_hot.cpu_number) -#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number) +#define raw_smp_processor_id() this_cpu_read(cpu_number) +#define __smp_processor_id() __this_cpu_read(cpu_number) static inline struct cpumask *cpu_llc_shared_mask(int cpu) { diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 1e7be9409aa2..175afc3ffb12 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -23,6 +23,9 @@ #include #include +DEFINE_PER_CPU_CACHE_HOT(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); + DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); @@ -161,7 +164,7 @@ void __init setup_per_cpu_areas(void) for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); - per_cpu(pcpu_hot.cpu_number, cpu) = cpu; + per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* * Copy data used in early init routines from the diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f4859516b190..6e604caa870c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -21702,12 +21702,12 @@ patch_map_ops_generic: if (insn->imm == BPF_FUNC_get_smp_processor_id && verifier_inlines_helper_call(env, insn->imm)) { /* BPF_FUNC_get_smp_processor_id inlining is an - * optimization, so if pcpu_hot.cpu_number is ever + * optimization, so if cpu_number is ever * changed in some incompatible and hard to support * way, it's fine to back out this inlining logic */ #ifdef CONFIG_SMP - insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number); + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number); insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0); cnt = 3; -- 2.51.0 From 839be1619fb897364b782997bc2c5d072d7a79f2 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 3 Mar 2025 11:52:40 -0500 Subject: [PATCH 13/16] x86/retbleed: Move call depth to percpu hot section No functional change. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Acked-by: Uros Bizjak Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303165246.2175811-6-brgerst@gmail.com --- arch/x86/include/asm/current.h | 3 --- arch/x86/include/asm/nospec-branch.h | 11 ++++++----- arch/x86/kernel/asm-offsets.c | 3 --- arch/x86/kernel/cpu/common.c | 8 ++++++++ arch/x86/lib/retpoline.S | 2 +- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index f988462d8b69..8ba2c0f8bcaf 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -14,9 +14,6 @@ struct task_struct; struct pcpu_hot { struct task_struct *current_task; -#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING - u64 call_depth; -#endif unsigned long top_of_stack; void *hardirq_stack_ptr; u16 softirq_pending; diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index aee26bb8230f..44c6076fd22b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -12,7 +12,6 @@ #include #include #include -#include /* * Call depth tracking for Intel SKL CPUs to address the RSB underflow @@ -78,21 +77,21 @@ #include #define CREDIT_CALL_DEPTH \ - movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); + movq $-1, PER_CPU_VAR(__x86_call_depth); #define RESET_CALL_DEPTH \ xor %eax, %eax; \ bts $63, %rax; \ - movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); + movq %rax, PER_CPU_VAR(__x86_call_depth); #define RESET_CALL_DEPTH_FROM_CALL \ movb $0xfc, %al; \ shl $56, %rax; \ - movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ + movq %rax, PER_CPU_VAR(__x86_call_depth); \ CALL_THUNKS_DEBUG_INC_CALLS #define INCREMENT_CALL_DEPTH \ - sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ + sarq $5, PER_CPU_VAR(__x86_call_depth); \ CALL_THUNKS_DEBUG_INC_CALLS #else @@ -387,6 +386,8 @@ extern void call_depth_return_thunk(void); __stringify(INCREMENT_CALL_DEPTH), \ X86_FEATURE_CALL_DEPTH) +DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth); + #ifdef CONFIG_CALL_THUNKS_DEBUG DECLARE_PER_CPU(u64, __x86_call_count); DECLARE_PER_CPU(u64, __x86_ret_count); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index a98020bf31bb..6fae88f8ae1e 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -109,9 +109,6 @@ static void __used common(void) OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); OFFSET(X86_current_task, pcpu_hot, current_task); -#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING - OFFSET(X86_call_depth, pcpu_hot, call_depth); -#endif #if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) /* Offset for fields in aria_ctx */ BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a9d61537d4a6..fd224ae57d62 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2075,6 +2075,14 @@ DEFINE_PER_CPU_CACHE_HOT(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); #ifdef CONFIG_X86_64 +/* + * Note: Do not make this dependant on CONFIG_MITIGATION_CALL_DEPTH_TRACKING + * so that this space is reserved in the hot cache section even when the + * mitigation is disabled. + */ +DEFINE_PER_CPU_CACHE_HOT(u64, __x86_call_depth); +EXPORT_PER_CPU_SYMBOL(__x86_call_depth); + static void wrmsrl_cstar(unsigned long val) { /* diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 038f49a43ab4..a26c43abd47d 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -343,7 +343,7 @@ SYM_FUNC_START(call_depth_return_thunk) * case. */ CALL_THUNKS_DEBUG_INC_RETS - shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth) + shlq $5, PER_CPU_VAR(__x86_call_depth) jz 1f ANNOTATE_UNRET_SAFE ret -- 2.51.0 From c8f1ac2bd7771a3bc536f897c142ca219cac13e1 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 3 Mar 2025 11:52:41 -0500 Subject: [PATCH 14/16] x86/softirq: Move softirq_pending to percpu hot section No functional change. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Acked-by: Uros Bizjak Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303165246.2175811-7-brgerst@gmail.com --- arch/x86/include/asm/current.h | 1 - arch/x86/include/asm/hardirq.h | 4 ++-- arch/x86/kernel/irq.c | 3 +++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 8ba2c0f8bcaf..f153c77853de 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -16,7 +16,6 @@ struct pcpu_hot { struct task_struct *current_task; unsigned long top_of_stack; void *hardirq_stack_ptr; - u16 softirq_pending; #ifdef CONFIG_X86_64 bool hardirq_stack_inuse; #else diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 6ffa8b75f4cd..f00c09ffe6a9 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -3,7 +3,6 @@ #define _ASM_X86_HARDIRQ_H #include -#include typedef struct { #if IS_ENABLED(CONFIG_KVM_INTEL) @@ -66,7 +65,8 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu); extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat -#define local_softirq_pending_ref pcpu_hot.softirq_pending +DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending); +#define local_softirq_pending_ref __softirq_pending #if IS_ENABLED(CONFIG_KVM_INTEL) /* diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index feca4f20b06a..83a5252a473c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -33,6 +33,9 @@ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); +DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending); +EXPORT_PER_CPU_SYMBOL(__softirq_pending); + atomic_t irq_err_count; /* -- 2.51.0 From c6a0918072eaa97df268c00c05822ea982a321b6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 3 Mar 2025 11:52:42 -0500 Subject: [PATCH 15/16] x86/irq: Move irq stacks to percpu hot section No functional change. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Acked-by: Uros Bizjak Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303165246.2175811-8-brgerst@gmail.com --- arch/x86/include/asm/current.h | 6 ------ arch/x86/include/asm/irq_stack.h | 12 ++++++------ arch/x86/include/asm/processor.h | 7 +++++++ arch/x86/kernel/dumpstack_32.c | 4 ++-- arch/x86/kernel/dumpstack_64.c | 2 +- arch/x86/kernel/irq.c | 2 ++ arch/x86/kernel/irq_32.c | 12 +++++++----- arch/x86/kernel/irq_64.c | 7 ++++--- arch/x86/kernel/process_64.c | 2 +- 9 files changed, 30 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index f153c77853de..6fad5a4c21d7 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -15,12 +15,6 @@ struct task_struct; struct pcpu_hot { struct task_struct *current_task; unsigned long top_of_stack; - void *hardirq_stack_ptr; -#ifdef CONFIG_X86_64 - bool hardirq_stack_inuse; -#else - void *softirq_stack_ptr; -#endif }; DECLARE_PER_CPU_CACHE_HOT(struct pcpu_hot, pcpu_hot); diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 562a547c29a5..735c3a491f60 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -116,7 +116,7 @@ ASM_CALL_ARG2 #define call_on_irqstack(func, asm_call, argconstr...) \ - call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr), \ + call_on_stack(__this_cpu_read(hardirq_stack_ptr), \ func, asm_call, argconstr) /* Macros to assert type correctness for run_*_on_irqstack macros */ @@ -135,7 +135,7 @@ * User mode entry and interrupt on the irq stack do not \ * switch stacks. If from user mode the task stack is empty. \ */ \ - if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \ + if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \ irq_enter_rcu(); \ func(c_args); \ irq_exit_rcu(); \ @@ -146,9 +146,9 @@ * places. Invoke the stack switch macro with the call \ * sequence which matches the above direct invocation. \ */ \ - __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ + __this_cpu_write(hardirq_stack_inuse, true); \ call_on_irqstack(func, asm_call, constr); \ - __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ + __this_cpu_write(hardirq_stack_inuse, false); \ } \ } @@ -212,9 +212,9 @@ */ #define do_softirq_own_stack() \ { \ - __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ + __this_cpu_write(hardirq_stack_inuse, true); \ call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \ - __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ + __this_cpu_write(hardirq_stack_inuse, false); \ } #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c241dbc1562c..6bb6af0b5430 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -415,6 +415,13 @@ struct irq_stack { char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); +DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); +#ifdef CONFIG_X86_64 +DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse); +#else +DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr); +#endif + #ifdef CONFIG_X86_64 static inline unsigned long cpu_kernelmode_gs_base(int cpu) { diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index b4905d5173fd..722fd712e1cf 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -37,7 +37,7 @@ const char *stack_type_name(enum stack_type type) static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); + unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* @@ -62,7 +62,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.softirq_stack_ptr); + unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index f05339fee778..6c5defd6569a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -134,7 +134,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); + unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); unsigned long *begin; /* diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 83a5252a473c..81f9b78e0f7b 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -36,6 +36,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending); EXPORT_PER_CPU_SYMBOL(__softirq_pending); +DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); + atomic_t irq_err_count; /* diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d301208d35d0..c7a5d2960d57 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -49,6 +49,8 @@ static inline bool check_stack_overflow(void) { return false; } static inline void print_stack_overflow(void) { } #endif +DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr); + static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %[sp], %%esp\n" @@ -70,7 +72,7 @@ static inline bool execute_on_irq_stack(bool overflow, struct irq_desc *desc) u32 *isp, *prev_esp; curstk = (struct irq_stack *) current_stack(); - irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr); + irqstk = __this_cpu_read(hardirq_stack_ptr); /* * this is where we switch to the IRQ stack. However, if we are @@ -107,7 +109,7 @@ int irq_init_percpu_irqstack(unsigned int cpu) int node = cpu_to_node(cpu); struct page *ph, *ps; - if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) + if (per_cpu(hardirq_stack_ptr, cpu)) return 0; ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); @@ -119,8 +121,8 @@ int irq_init_percpu_irqstack(unsigned int cpu) return -ENOMEM; } - per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = page_address(ph); - per_cpu(pcpu_hot.softirq_stack_ptr, cpu) = page_address(ps); + per_cpu(hardirq_stack_ptr, cpu) = page_address(ph); + per_cpu(softirq_stack_ptr, cpu) = page_address(ps); return 0; } @@ -130,7 +132,7 @@ void do_softirq_own_stack(void) struct irq_stack *irqstk; u32 *isp, *prev_esp; - irqstk = __this_cpu_read(pcpu_hot.softirq_stack_ptr); + irqstk = __this_cpu_read(softirq_stack_ptr); /* build the stack frame on the softirq stack */ isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 56bdeecd8ee0..ca78dce39361 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -26,6 +26,7 @@ #include #include +DEFINE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse); DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible; #ifdef CONFIG_VMAP_STACK @@ -50,7 +51,7 @@ static int map_irq_stack(unsigned int cpu) return -ENOMEM; /* Store actual TOS to avoid adjustment in the hotpath */ - per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; + per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #else @@ -63,14 +64,14 @@ static int map_irq_stack(unsigned int cpu) void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); /* Store actual TOS to avoid adjustment in the hotpath */ - per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; + per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #endif int irq_init_percpu_irqstack(unsigned int cpu) { - if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) + if (per_cpu(hardirq_stack_ptr, cpu)) return 0; return map_irq_stack(cpu); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f983d2a57ac3..2f38416deb74 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -614,7 +614,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && - this_cpu_read(pcpu_hot.hardirq_stack_inuse)); + this_cpu_read(hardirq_stack_inuse)); if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD)) switch_fpu_prepare(prev_p, cpu); -- 2.51.0 From 385f72c83eb609652f02dc9ee415520c23bda629 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 3 Mar 2025 11:52:43 -0500 Subject: [PATCH 16/16] x86/percpu: Move top_of_stack to percpu hot section No functional change. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Acked-by: Uros Bizjak Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250303165246.2175811-9-brgerst@gmail.com --- arch/x86/entry/entry_32.S | 4 ++-- arch/x86/entry/entry_64.S | 6 +++--- arch/x86/entry/entry_64_compat.S | 4 ++-- arch/x86/include/asm/current.h | 1 - arch/x86/include/asm/percpu.h | 2 +- arch/x86/include/asm/processor.h | 9 +++++++-- arch/x86/kernel/asm-offsets.c | 1 - arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kernel/process_32.c | 4 ++-- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/vmlinux.lds.S | 1 + 12 files changed, 22 insertions(+), 17 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 20be5758c2d2..92c0b4a94e0a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1153,7 +1153,7 @@ SYM_CODE_START(asm_exc_nmi) * is using the thread stack right now, so it's safe for us to use it. */ movl %esp, %ebx - movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp + movl PER_CPU_VAR(cpu_current_top_of_stack), %esp call exc_nmi movl %ebx, %esp @@ -1217,7 +1217,7 @@ SYM_CODE_START(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp - movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi + movl PER_CPU_VAR(cpu_current_top_of_stack), %esi leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp call make_task_dead diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 49d3b222fe99..f40bdf97d390 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64) /* tss.sp2 is scratch space. */ movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -1168,7 +1168,7 @@ SYM_CODE_START(asm_exc_nmi) FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ @@ -1486,7 +1486,7 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax + movq PER_CPU_VAR(cpu_current_top_of_stack), %rax leaq -PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index ed0a5f2dc129..a45e1125fc6c 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -57,7 +57,7 @@ SYM_CODE_START(entry_SYSENTER_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rax popq %rax - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -193,7 +193,7 @@ SYM_CODE_START(entry_SYSCALL_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp /* Switch to the kernel stack */ - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 6fad5a4c21d7..3d1b123c2ee3 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -14,7 +14,6 @@ struct task_struct; struct pcpu_hot { struct task_struct *current_task; - unsigned long top_of_stack; }; DECLARE_PER_CPU_CACHE_HOT(struct pcpu_hot, pcpu_hot); diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 8a8cf86dded3..41517a7f7f1c 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -551,7 +551,7 @@ do { \ * it is accessed while this_cpu_read_stable() allows the value to be cached. * this_cpu_read_stable() is more efficient and can be used if its value * is guaranteed to be valid across CPUs. The current users include - * pcpu_hot.current_task and pcpu_hot.top_of_stack, both of which are + * pcpu_hot.current_task and cpu_current_top_of_stack, both of which are * actually per-thread variables implemented as per-CPU variables and * thus stable for the duration of the respective task. */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6bb6af0b5430..7a3918308a36 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -422,6 +422,11 @@ DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse); DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr); #endif +DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack); +/* const-qualified alias provided by the linker. */ +DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override, + const_cpu_current_top_of_stack); + #ifdef CONFIG_X86_64 static inline unsigned long cpu_kernelmode_gs_base(int cpu) { @@ -547,9 +552,9 @@ static __always_inline unsigned long current_top_of_stack(void) * entry trampoline. */ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) - return this_cpu_read_const(const_pcpu_hot.top_of_stack); + return this_cpu_read_const(const_cpu_current_top_of_stack); - return this_cpu_read_stable(pcpu_hot.top_of_stack); + return this_cpu_read_stable(cpu_current_top_of_stack); } static __always_inline bool on_thread_stack(void) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 6fae88f8ae1e..54ace808defd 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -107,7 +107,6 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); - OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); OFFSET(X86_current_task, pcpu_hot, current_task); #if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) /* Offset for fields in aria_ctx */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fd224ae57d62..51653e01a716 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2066,7 +2066,6 @@ __setup("setcpuid=", setup_setcpuid); DEFINE_PER_CPU_CACHE_HOT(struct pcpu_hot, pcpu_hot) = { .current_task = &init_task, - .top_of_stack = TOP_OF_INIT_STACK, }; EXPORT_PER_CPU_SYMBOL(pcpu_hot); EXPORT_PER_CPU_SYMBOL(const_pcpu_hot); @@ -2074,6 +2073,8 @@ EXPORT_PER_CPU_SYMBOL(const_pcpu_hot); DEFINE_PER_CPU_CACHE_HOT(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); +DEFINE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK; + #ifdef CONFIG_X86_64 /* * Note: Do not make this dependant on CONFIG_MITIGATION_CALL_DEPTH_TRACKING diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2bdab416298c..8ec44acb863b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -190,13 +190,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) arch_end_context_switch(next_p); /* - * Reload esp0 and pcpu_hot.top_of_stack. This changes + * Reload esp0 and cpu_current_top_of_stack. This changes * current_thread_info(). Refresh the SYSENTER configuration in * case prev or next is vm86. */ update_task_stack(next_p); refresh_sysenter_cs(next); - this_cpu_write(pcpu_hot.top_of_stack, + this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 2f38416deb74..d8f4bcef8ee4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -669,7 +669,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Switch the PDA and FPU contexts. */ raw_cpu_write(pcpu_hot.current_task, next_p); - raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); + raw_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); switch_fpu_finish(next_p); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c5aabddf5573..c3a26e60e3c4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -832,7 +832,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ - per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); + per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); #endif return 0; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0ef9870ea52e..475f6717f27a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -44,6 +44,7 @@ ENTRY(phys_startup_64) jiffies = jiffies_64; const_pcpu_hot = pcpu_hot; +const_cpu_current_top_of_stack = cpu_current_top_of_stack; #if defined(CONFIG_X86_64) /* -- 2.51.0