From bd72baff229920da1d57c14364c11ecdbaf5b458 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 4 Mar 2025 17:33:42 +0200 Subject: [PATCH 01/16] x86/runtime-const: Add the RUNTIME_CONST_PTR assembly macro Add an assembly macro to refer runtime cost. It hides linker magic and makes assembly more readable. Signed-off-by: Kirill A. Shutemov Signed-off-by: Ingo Molnar Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250304153342.2016569-1-kirill.shutemov@linux.intel.com --- arch/x86/include/asm/runtime-const.h | 13 +++++++++++++ arch/x86/lib/getuser.S | 7 ++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h index 6652ebddfd02..8d983cfd06ea 100644 --- a/arch/x86/include/asm/runtime-const.h +++ b/arch/x86/include/asm/runtime-const.h @@ -2,6 +2,18 @@ #ifndef _ASM_RUNTIME_CONST_H #define _ASM_RUNTIME_CONST_H +#ifdef __ASSEMBLY__ + +.macro RUNTIME_CONST_PTR sym reg + movq $0x0123456789abcdef, %\reg + 1: + .pushsection runtime_ptr_\sym, "a" + .long 1b - 8 - . + .popsection +.endm + +#else /* __ASSEMBLY__ */ + #define runtime_const_ptr(sym) ({ \ typeof(sym) __ret; \ asm_inline("mov %1,%0\n1:\n" \ @@ -58,4 +70,5 @@ static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), } } +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 71d8e7da4fc5..9d5654b8a72a 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -35,16 +35,13 @@ #include #include #include +#include #define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC .macro check_range size:req .if IS_ENABLED(CONFIG_X86_64) - movq $0x0123456789abcdef,%rdx - 1: - .pushsection runtime_ptr_USER_PTR_MAX,"a" - .long 1b - 8 - . - .popsection + RUNTIME_CONST_PTR USER_PTR_MAX, rdx cmp %rdx, %rax cmova %rdx, %rax .else -- 2.51.0 From a9deda695972cb53fe815e175b7d66757964764e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 6 Mar 2025 15:52:11 +0100 Subject: [PATCH 02/16] x86/kexec: Merge x86_32 and x86_64 code using macros from Merge common x86_32 and x86_64 code in crash_setup_regs() using macros from . The compiled object files before and after the patch are unchanged. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: David Woodhouse Cc: Baoquan He Cc: Vivek Goyal Cc: Dave Young Cc: Ard Biesheuvel Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20250306145227.55819-1-ubizjak@gmail.com --- arch/x86/include/asm/kexec.h | 58 ++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 8ad187462b68..e3589d6aec24 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -71,41 +72,32 @@ static inline void crash_setup_regs(struct pt_regs *newregs, if (oldregs) { memcpy(newregs, oldregs, sizeof(*newregs)); } else { + asm volatile("mov %%" _ASM_BX ",%0" : "=m"(newregs->bx)); + asm volatile("mov %%" _ASM_CX ",%0" : "=m"(newregs->cx)); + asm volatile("mov %%" _ASM_DX ",%0" : "=m"(newregs->dx)); + asm volatile("mov %%" _ASM_SI ",%0" : "=m"(newregs->si)); + asm volatile("mov %%" _ASM_DI ",%0" : "=m"(newregs->di)); + asm volatile("mov %%" _ASM_BP ",%0" : "=m"(newregs->bp)); + asm volatile("mov %%" _ASM_AX ",%0" : "=m"(newregs->ax)); + asm volatile("mov %%" _ASM_SP ",%0" : "=m"(newregs->sp)); +#ifdef CONFIG_X86_64 + asm volatile("mov %%r8,%0" : "=m"(newregs->r8)); + asm volatile("mov %%r9,%0" : "=m"(newregs->r9)); + asm volatile("mov %%r10,%0" : "=m"(newregs->r10)); + asm volatile("mov %%r11,%0" : "=m"(newregs->r11)); + asm volatile("mov %%r12,%0" : "=m"(newregs->r12)); + asm volatile("mov %%r13,%0" : "=m"(newregs->r13)); + asm volatile("mov %%r14,%0" : "=m"(newregs->r14)); + asm volatile("mov %%r15,%0" : "=m"(newregs->r15)); +#endif + asm volatile("mov %%ss,%k0" : "=a"(newregs->ss)); + asm volatile("mov %%cs,%k0" : "=a"(newregs->cs)); #ifdef CONFIG_X86_32 - asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); - asm volatile("movl %%ecx,%0" : "=m"(newregs->cx)); - asm volatile("movl %%edx,%0" : "=m"(newregs->dx)); - asm volatile("movl %%esi,%0" : "=m"(newregs->si)); - asm volatile("movl %%edi,%0" : "=m"(newregs->di)); - asm volatile("movl %%ebp,%0" : "=m"(newregs->bp)); - asm volatile("movl %%eax,%0" : "=m"(newregs->ax)); - asm volatile("movl %%esp,%0" : "=m"(newregs->sp)); - asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); - asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); - asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds)); - asm volatile("movl %%es, %%eax;" :"=a"(newregs->es)); - asm volatile("pushfl; popl %0" :"=m"(newregs->flags)); -#else - asm volatile("movq %%rbx,%0" : "=m"(newregs->bx)); - asm volatile("movq %%rcx,%0" : "=m"(newregs->cx)); - asm volatile("movq %%rdx,%0" : "=m"(newregs->dx)); - asm volatile("movq %%rsi,%0" : "=m"(newregs->si)); - asm volatile("movq %%rdi,%0" : "=m"(newregs->di)); - asm volatile("movq %%rbp,%0" : "=m"(newregs->bp)); - asm volatile("movq %%rax,%0" : "=m"(newregs->ax)); - asm volatile("movq %%rsp,%0" : "=m"(newregs->sp)); - asm volatile("movq %%r8,%0" : "=m"(newregs->r8)); - asm volatile("movq %%r9,%0" : "=m"(newregs->r9)); - asm volatile("movq %%r10,%0" : "=m"(newregs->r10)); - asm volatile("movq %%r11,%0" : "=m"(newregs->r11)); - asm volatile("movq %%r12,%0" : "=m"(newregs->r12)); - asm volatile("movq %%r13,%0" : "=m"(newregs->r13)); - asm volatile("movq %%r14,%0" : "=m"(newregs->r14)); - asm volatile("movq %%r15,%0" : "=m"(newregs->r15)); - asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); - asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); - asm volatile("pushfq; popq %0" :"=m"(newregs->flags)); + asm volatile("mov %%ds,%k0" : "=a"(newregs->ds)); + asm volatile("mov %%es,%k0" : "=a"(newregs->es)); #endif + asm volatile("pushf\n\t" + "pop %0" : "=m"(newregs->flags)); newregs->ip = _THIS_IP_; } } -- 2.51.0 From 3f5dbafc2d4651020f45309ca85120b6a8162fd9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 12 Mar 2025 11:27:41 +0100 Subject: [PATCH 03/16] x86/head/64: Avoid Clang < 17 stack protector in startup code Clang versions before 17 will not honour -fdirect-access-external-data for the load of the stack cookie emitted into each function's prologue and epilogue, and will emit a GOT based reference instead, e.g., 4c 8b 2d 00 00 00 00 mov 0x0(%rip),%r13 18a: R_X86_64_REX_GOTPCRELX __ref_stack_chk_guard-0x4 65 49 8b 45 00 mov %gs:0x0(%r13),%rax This is inefficient, but at least, the linker will usually follow the rules of the x86 psABI, and relax the GOT load into a RIP-relative LEA instruction. This is still suboptimal, as the per-CPU load could use a RIP-relative reference directly, but at least it gets rid of the first load from memory. However, Boris reports that in some cases, when using distro builds of Clang/LLD 15, the first load gets relaxed into 49 c7 c6 20 c0 55 86 mov $0xffffffff8655c020,%r14 ffffffff8373bf0f: R_X86_64_32S __ref_stack_chk_guard 65 49 8b 06 mov %gs:(%r14),%rax instead, which is fine in principle, as MOV may be cheaper than LEA on some micro-architectures. However, such absolute references assume that the variable in question can be accessed via the kernel virtual mapping, and this is not guaranteed for the startup code residing in .head.text. This is therefore a true positive, that was caught using the recently introduced relocs check for absolute references in the startup code: Absolute reference to symbol '__ref_stack_chk_guard' not permitted in .head.text Work around the issue by disabling the stack protector in the startup code for Clang versions older than 17. Fixes: 80d47defddc0 ("x86/stackprotector/64: Convert to normal per-CPU variable") Reported-by: Borislav Petkov Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250312102740.602870-2-ardb+git@google.com --- arch/x86/include/asm/init.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 0e82ebc5d1e1..8b1b1abcef15 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -2,7 +2,11 @@ #ifndef _ASM_X86_INIT_H #define _ASM_X86_INIT_H +#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000 +#define __head __section(".head.text") __no_sanitize_undefined __no_stack_protector +#else #define __head __section(".head.text") __no_sanitize_undefined +#endif struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ -- 2.51.0 From 91d5451d97ce35cbd510277fa3b7abf9caa4e34d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Mar 2025 12:48:49 +0100 Subject: [PATCH 04/16] x86/stackprotector/64: Only export __ref_stack_chk_guard on CONFIG_SMP MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The __ref_stack_chk_guard symbol doesn't exist on UP: :4:15: error: ‘__ref_stack_chk_guard’ undeclared here (not in a function) Fix the #ifdef around the entry.S export. Signed-off-by: Ingo Molnar Cc: Brian Gerst Cc: Ard Biesheuvel Cc: Uros Bizjak Link: https://lore.kernel.org/r/20250123190747.745588-8-brgerst@gmail.com --- arch/x86/entry/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 088f91f76edb..d3caa31240ed 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -64,6 +64,6 @@ THUNK warn_thunk_thunk, __warn_thunk * entirely in the C code, and use an alias emitted by the linker script * instead. */ -#ifdef CONFIG_STACKPROTECTOR +#if defined(CONFIG_STACKPROTECTOR) && defined(CONFIG_SMP) EXPORT_SYMBOL(__ref_stack_chk_guard); #endif -- 2.51.0 From 72899899e4f9de0b545218e66bf14cfa2579f2f8 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 12 Mar 2025 13:38:43 +0100 Subject: [PATCH 05/16] x86/hweight: Use named operands in inline asm() No functional change intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250312123905.149298-1-ubizjak@gmail.com --- arch/x86/include/asm/arch_hweight.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index ba88edd0d58b..a11bb841c434 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -16,9 +16,9 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res; - asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + asm (ALTERNATIVE("call __sw_hweight32", "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT) + : [cnt] "=" REG_OUT (res) + : [val] REG_IN (w)); return res; } @@ -44,9 +44,9 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) { unsigned long res; - asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + asm (ALTERNATIVE("call __sw_hweight64", "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT) + : [cnt] "=" REG_OUT (res) + : [val] REG_IN (w)); return res; } -- 2.51.0 From 194a613088a8c9dae300dfb08433287cee803e8d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 12 Mar 2025 13:38:44 +0100 Subject: [PATCH 06/16] x86/hweight: Use ASM_CALL_CONSTRAINT in inline asm() Use ASM_CALL_CONSTRAINT to prevent inline asm() that includes call instruction from being scheduled before the frame pointer gets set up by the containing function. This unconstrained scheduling might cause objtool to print a "call without frame pointer save/setup" warning. Current versions of compilers don't seem to trigger this condition, but without this constraint there's nothing to prevent the compiler from scheduling the insn in front of frame creation. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250312123905.149298-2-ubizjak@gmail.com --- arch/x86/include/asm/arch_hweight.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index a11bb841c434..f233eb00f41f 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -17,7 +17,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) unsigned int res; asm (ALTERNATIVE("call __sw_hweight32", "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT) - : [cnt] "=" REG_OUT (res) + : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT : [val] REG_IN (w)); return res; @@ -45,7 +45,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) unsigned long res; asm (ALTERNATIVE("call __sw_hweight64", "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT) - : [cnt] "=" REG_OUT (res) + : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT : [val] REG_IN (w)); return res; -- 2.51.0 From 21fe2514849bb4de05fbd098e311a87de6a62d4b Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 12 Mar 2025 13:38:45 +0100 Subject: [PATCH 07/16] x86/hweight: Use asm_inline() instead of asm() Use asm_inline() to instruct the compiler that the size of asm() is the minimum size of one instruction, ignoring how many instructions the compiler thinks it is. ALTERNATIVE macro that expands to several pseudo directives causes instruction length estimate to count more than 20 instructions. bloat-o-meter reports slight reduction of the code size for x86_64 defconfig object file, compiled with gcc-14.2: add/remove: 6/12 grow/shrink: 59/50 up/down: 3389/-3560 (-171) Total: Before=22734393, After=22734222, chg -0.00% where 29 instances of code blocks involving POPCNT now gets inlined, resulting in the removal of several functions: format_is_yuv_semiplanar.part.isra 41 - -41 cdclk_divider 69 - -69 intel_joiner_adjust_timings 140 - -140 nl80211_send_wowlan_tcp_caps 369 - -369 nl80211_send_iftype_data 579 - -579 __do_sys_pidfd_send_signal 809 - -809 One noticeable change is: pcpu_page_first_chunk 1075 1060 -15 Where the compiler now inlines 4 more instances of POPCNT insns, but still manages to compile to a function with smaller code size. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250312123905.149298-3-ubizjak@gmail.com --- arch/x86/include/asm/arch_hweight.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index f233eb00f41f..b5982b94bdba 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -16,7 +16,8 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res; - asm (ALTERNATIVE("call __sw_hweight32", "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT) + asm_inline (ALTERNATIVE("call __sw_hweight32", + "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT) : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT : [val] REG_IN (w)); @@ -44,7 +45,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) { unsigned long res; - asm (ALTERNATIVE("call __sw_hweight64", "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT) + asm_inline (ALTERNATIVE("call __sw_hweight64", + "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT) : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT : [val] REG_IN (w)); -- 2.51.0 From 53286632450835c49b5c177f97e4899645f15730 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 13 Mar 2025 11:26:55 +0100 Subject: [PATCH 08/16] x86/asm: Use CLFLUSHOPT and CLWB mnemonics in Current minimum required version of binutils is 2.25, which supports CLFLUSHOPT and CLWB instruction mnemonics. Replace the byte-wise specification of CLFLUSHOPT and CLWB with these proper mnemonics. No functional change intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250313102715.333142-1-ubizjak@gmail.com --- arch/x86/include/asm/special_insns.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 21ce480658b1..9b10bd102d3d 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -176,9 +176,8 @@ static __always_inline void clflush(volatile void *__p) static inline void clflushopt(volatile void *__p) { - alternative_io(".byte 0x3e; clflush %0", - ".byte 0x66; clflush %0", - X86_FEATURE_CLFLUSHOPT, + alternative_io("ds clflush %0", + "clflushopt %0", X86_FEATURE_CLFLUSHOPT, "+m" (*(volatile char __force *)__p)); } @@ -187,13 +186,10 @@ static inline void clwb(volatile void *__p) volatile struct { char x[64]; } *p = __p; asm volatile(ALTERNATIVE_2( - ".byte 0x3e; clflush (%[pax])", - ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */ - X86_FEATURE_CLFLUSHOPT, - ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */ - X86_FEATURE_CLWB) - : [p] "+m" (*p) - : [pax] "a" (p)); + "ds clflush %0", + "clflushopt %0", X86_FEATURE_CLFLUSHOPT, + "clwb %0", X86_FEATURE_CLWB) + : "+m" (*p)); } #ifdef CONFIG_X86_USER_SHADOW_STACK -- 2.51.0 From f685a96bfd7963a587c76bd5709f2d9170820875 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 13 Mar 2025 11:26:56 +0100 Subject: [PATCH 09/16] x86/asm: Use asm_inline() instead of asm() in clwb() Use asm_inline() to instruct the compiler that the size of asm() is the minimum size of one instruction, ignoring how many instructions the compiler thinks it is. ALTERNATIVE macro that expands to several pseudo directives causes instruction length estimate to count more than 20 instructions. bloat-o-meter reports slight increase of the code size for x86_64 defconfig object file, compiled with gcc-14.2: add/remove: 0/2 grow/shrink: 3/0 up/down: 190/-59 (131) Function old new delta __copy_user_flushcache 166 247 +81 __memcpy_flushcache 369 437 +68 arch_wb_cache_pmem 6 47 +41 __pfx_clean_cache_range 16 - -16 clean_cache_range 43 - -43 Total: Before=22807167, After=22807298, chg +0.00% The compiler now inlines and removes the clean_cache_range() function. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250313102715.333142-2-ubizjak@gmail.com --- arch/x86/include/asm/special_insns.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 9b10bd102d3d..6266d6b9e0b8 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -185,7 +185,7 @@ static inline void clwb(volatile void *__p) { volatile struct { char x[64]; } *p = __p; - asm volatile(ALTERNATIVE_2( + asm_inline volatile(ALTERNATIVE_2( "ds clflush %0", "clflushopt %0", X86_FEATURE_CLFLUSHOPT, "clwb %0", X86_FEATURE_CLWB) -- 2.51.0 From faa6f77b0dfae95b7270084bb2cc7fe30d687761 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 9 Mar 2025 18:09:36 +0100 Subject: [PATCH 10/16] x86/locking/atomic: Improve performance by using asm_inline() for atomic locking instructions According to: https://gcc.gnu.org/onlinedocs/gcc/Size-of-an-asm.html the usage of asm pseudo directives in the asm template can confuse the compiler to wrongly estimate the size of the generated code. The LOCK_PREFIX macro expands to several asm pseudo directives, so its usage in atomic locking insns causes instruction length estimates to fail significantly (the specially instrumented compiler reports the estimated length of these asm templates to be 6 instructions long). This incorrect estimate further causes unoptimal inlining decisions, un-optimal instruction scheduling and un-optimal code block alignments for functions that use these locking primitives. Use asm_inline instead: https://gcc.gnu.org/pipermail/gcc-patches/2018-December/512349.html which is a feature that makes GCC pretend some inline assembler code is tiny (while it would think it is huge), instead of just asm. For code size estimation, the size of the asm is then taken as the minimum size of one instruction, ignoring how many instructions compiler thinks it is. bloat-o-meter reports the following code size increase (x86_64 defconfig, gcc-14.2.1): add/remove: 82/283 grow/shrink: 870/372 up/down: 76272/-43618 (32654) Total: Before=22770320, After=22802974, chg +0.14% with top grows (>500 bytes): Function old new delta ---------------------------------------------------------------- copy_process 6465 10191 +3726 balance_dirty_pages_ratelimited_flags 237 2949 +2712 icl_plane_update_noarm 5800 7969 +2169 samsung_input_mapping 3375 5170 +1795 ext4_do_update_inode.isra - 1526 +1526 __schedule 2416 3472 +1056 __i915_vma_resource_unhold - 946 +946 sched_mm_cid_after_execve 175 1097 +922 __do_sys_membarrier - 862 +862 filemap_fault 2666 3462 +796 nl80211_send_wiphy 11185 11874 +689 samsung_input_mapping.cold 900 1500 +600 virtio_gpu_queue_fenced_ctrl_buffer 839 1410 +571 ilk_update_pipe_csc 1201 1735 +534 enable_step - 525 +525 icl_color_commit_noarm 1334 1847 +513 tg3_read_bc_ver - 501 +501 and top shrinks (>500 bytes): Function old new delta ---------------------------------------------------------------- nl80211_send_iftype_data 580 - -580 samsung_gamepad_input_mapping.isra.cold 604 - -604 virtio_gpu_queue_ctrl_sgs 724 - -724 tg3_get_invariants 9218 8376 -842 __i915_vma_resource_unhold.part 899 - -899 ext4_mark_iloc_dirty 1735 106 -1629 samsung_gamepad_input_mapping.isra 2046 - -2046 icl_program_input_csc 2203 - -2203 copy_mm 2242 - -2242 balance_dirty_pages 2657 - -2657 These code size changes can be grouped into 4 groups: a) some functions now include once-called functions in full or in part. These are: Function old new delta ---------------------------------------------------------------- copy_process 6465 10191 +3726 balance_dirty_pages_ratelimited_flags 237 2949 +2712 icl_plane_update_noarm 5800 7969 +2169 samsung_input_mapping 3375 5170 +1795 ext4_do_update_inode.isra - 1526 +1526 that now include: Function old new delta ---------------------------------------------------------------- copy_mm 2242 - -2242 balance_dirty_pages 2657 - -2657 icl_program_input_csc 2203 - -2203 samsung_gamepad_input_mapping.isra 2046 - -2046 ext4_mark_iloc_dirty 1735 106 -1629 b) ISRA [interprocedural scalar replacement of aggregates, interprocedural pass that removes unused function return values (turning functions returning a value which is never used into void functions) and removes unused function parameters. It can also replace an aggregate parameter by a set of other parameters representing part of the original, turning those passed by reference into new ones which pass the value directly.] Top grows and shrinks of this group are listed below: Function old new delta ---------------------------------------------------------------- ext4_do_update_inode.isra - 1526 +1526 nfs4_begin_drain_session.isra - 249 +249 nfs4_end_drain_session.isra - 168 +168 __guc_action_register_multi_lrc_v70.isra 335 500 +165 __i915_gem_free_objects.isra - 144 +144 ... membarrier_register_private_expedited.isra 108 - -108 syncobj_eventfd_entry_func.isra 445 314 -131 __ext4_sb_bread_gfp.isra 140 - -140 class_preempt_notrace_destructor.isra 145 - -145 p9_fid_put.isra 151 - -151 __mm_cid_try_get.isra 238 - -238 membarrier_global_expedited.isra 294 - -294 mm_cid_get.isra 295 - -295 samsung_gamepad_input_mapping.isra.cold 604 - -604 samsung_gamepad_input_mapping.isra 2046 - -2046 c) different split points of hot/cold split that just move code around: Top grows and shrinks of this group are listed below: Function old new delta ---------------------------------------------------------------- samsung_input_mapping.cold 900 1500 +600 __i915_request_reset.cold 311 389 +78 nfs_update_inode.cold 77 153 +76 __do_sys_swapon.cold 404 455 +51 copy_process.cold - 45 +45 tg3_get_invariants.cold 73 115 +42 ... hibernate.cold 671 643 -28 copy_mm.cold 31 - -31 software_resume.cold 249 207 -42 io_poll_wake.cold 106 54 -52 samsung_gamepad_input_mapping.isra.cold 604 - -604 c) full inline of small functions with locking insn (~150 cases). These bring in most of the code size increase because the removed function code is now inlined in multiple places. E.g.: 0000000000a50e10 : a50e10: 48 63 07 movslq (%rdi),%rax a50e13: 85 c0 test %eax,%eax a50e15: 7e 10 jle a50e27 a50e17: 48 8b 4f 50 mov 0x50(%rdi),%rcx a50e1b: f0 48 0f b3 41 50 lock btr %rax,0x50(%rcx) a50e21: c7 07 ff ff ff ff movl $0xffffffff,(%rdi) a50e27: e9 00 00 00 00 jmp a50e2c a50e28: R_X86_64_PLT32 __x86_return_thunk-0x4 a50e2c: 0f 1f 40 00 nopl 0x0(%rax) is now fully inlined into the caller function. This is desirable due to the per function overhead of CPU bug mitigations like retpolines. FTR a) with -Os (where generated code size really matters) x86_64 defconfig object file decreases by 24.388 kbytes, representing 0.1% code size decrease: text data bss dec hex filename 23883860 4617284 814212 29315356 1bf511c vmlinux-old.o 23859472 4615404 814212 29289088 1beea80 vmlinux-new.o FTR b) clang recognizes "asm inline", but there was no difference in code sizes: text data bss dec hex filename 27577163 4503078 807732 32887973 1f5d4a5 vmlinux-clang-patched.o 27577181 4503078 807732 32887991 1f5d4b7 vmlinux-clang-unpatched.o The performance impact of the patch was assessed by recompiling fedora-41 6.13.5 kernel and running lmbench with old and new kernel. The most noticeable improvements were: Process fork+exit: 270.0952 microseconds Process fork+execve: 2620.3333 microseconds Process fork+/bin/sh -c: 6781.0000 microseconds File /usr/tmp/XXX write bandwidth: 1780350 KB/sec Pagefaults on /usr/tmp/XXX: 0.3875 microseconds to: Process fork+exit: 298.6842 microseconds Process fork+execve: 1662.7500 microseconds Process fork+/bin/sh -c: 2127.6667 microseconds File /usr/tmp/XXX write bandwidth: 1950077 KB/sec Pagefaults on /usr/tmp/XXX: 0.1958 microseconds and from: Socket bandwidth using localhost 0.000001 2.52 MB/sec 0.000064 163.02 MB/sec 0.000128 321.70 MB/sec 0.000256 630.06 MB/sec 0.000512 1207.07 MB/sec 0.001024 2004.06 MB/sec 0.001437 2475.43 MB/sec 10.000000 5817.34 MB/sec Avg xfer: 3.2KB, 41.8KB in 1.2230 millisecs, 34.15 MB/sec AF_UNIX sock stream bandwidth: 9850.01 MB/sec Pipe bandwidth: 4631.28 MB/sec to: Socket bandwidth using localhost 0.000001 3.13 MB/sec 0.000064 187.08 MB/sec 0.000128 324.12 MB/sec 0.000256 618.51 MB/sec 0.000512 1137.13 MB/sec 0.001024 1962.95 MB/sec 0.001437 2458.27 MB/sec 10.000000 6168.08 MB/sec Avg xfer: 3.2KB, 41.8KB in 1.0060 millisecs, 41.52 MB/sec AF_UNIX sock stream bandwidth: 9921.68 MB/sec Pipe bandwidth: 4649.96 MB/sec [ mingo: Prettified the changelog a bit. ] Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Nathan Chancellor Cc: Nick Desaulniers Link: https://lore.kernel.org/r/20250309170955.48919-1-ubizjak@gmail.com --- arch/x86/include/asm/atomic.h | 14 +++++++------- arch/x86/include/asm/atomic64_64.h | 14 +++++++------- arch/x86/include/asm/bitops.h | 14 +++++++------- arch/x86/include/asm/cmpxchg.h | 24 ++++++++++++------------ arch/x86/include/asm/cmpxchg_32.h | 4 ++-- arch/x86/include/asm/cmpxchg_64.h | 4 ++-- arch/x86/include/asm/rmwcc.h | 2 +- 7 files changed, 38 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 55b4d24356ea..75743f1dfd4e 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -30,14 +30,14 @@ static __always_inline void arch_atomic_set(atomic_t *v, int i) static __always_inline void arch_atomic_add(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "addl %1,%0" + asm_inline volatile(LOCK_PREFIX "addl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline void arch_atomic_sub(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "subl %1,%0" + asm_inline volatile(LOCK_PREFIX "subl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); } @@ -50,14 +50,14 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) static __always_inline void arch_atomic_inc(atomic_t *v) { - asm volatile(LOCK_PREFIX "incl %0" + asm_inline volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_inc arch_atomic_inc static __always_inline void arch_atomic_dec(atomic_t *v) { - asm volatile(LOCK_PREFIX "decl %0" + asm_inline volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_dec arch_atomic_dec @@ -116,7 +116,7 @@ static __always_inline int arch_atomic_xchg(atomic_t *v, int new) static __always_inline void arch_atomic_and(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "andl %1,%0" + asm_inline volatile(LOCK_PREFIX "andl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); @@ -134,7 +134,7 @@ static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) static __always_inline void arch_atomic_or(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "orl %1,%0" + asm_inline volatile(LOCK_PREFIX "orl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); @@ -152,7 +152,7 @@ static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) static __always_inline void arch_atomic_xor(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "xorl %1,%0" + asm_inline volatile(LOCK_PREFIX "xorl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index ae12acae5b06..87b496325b5b 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -22,14 +22,14 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "addq %1,%0" + asm_inline volatile(LOCK_PREFIX "addq %1, %0" : "=m" (v->counter) : "er" (i), "m" (v->counter) : "memory"); } static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "subq %1,%0" + asm_inline volatile(LOCK_PREFIX "subq %1, %0" : "=m" (v->counter) : "er" (i), "m" (v->counter) : "memory"); } @@ -42,7 +42,7 @@ static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) static __always_inline void arch_atomic64_inc(atomic64_t *v) { - asm volatile(LOCK_PREFIX "incq %0" + asm_inline volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) : "m" (v->counter) : "memory"); } @@ -50,7 +50,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) static __always_inline void arch_atomic64_dec(atomic64_t *v) { - asm volatile(LOCK_PREFIX "decq %0" + asm_inline volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) : "m" (v->counter) : "memory"); } @@ -110,7 +110,7 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "andq %1,%0" + asm_inline volatile(LOCK_PREFIX "andq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); @@ -128,7 +128,7 @@ static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "orq %1,%0" + asm_inline volatile(LOCK_PREFIX "orq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); @@ -146,7 +146,7 @@ static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "xorq %1,%0" + asm_inline volatile(LOCK_PREFIX "xorq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index b96d45944c59..100413aff640 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -52,12 +52,12 @@ static __always_inline void arch_set_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { - asm volatile(LOCK_PREFIX "orb %b1,%0" + asm_inline volatile(LOCK_PREFIX "orb %b1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" (CONST_MASK(nr)) : "memory"); } else { - asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" + asm_inline volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } @@ -72,11 +72,11 @@ static __always_inline void arch_clear_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { - asm volatile(LOCK_PREFIX "andb %b1,%0" + asm_inline volatile(LOCK_PREFIX "andb %b1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" (~CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" + asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } @@ -98,7 +98,7 @@ static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, volatile unsigned long *addr) { bool negative; - asm volatile(LOCK_PREFIX "xorb %2,%1" + asm_inline volatile(LOCK_PREFIX "xorb %2,%1" CC_SET(s) : CC_OUT(s) (negative), WBYTE_ADDR(addr) : "iq" ((char)mask) : "memory"); @@ -122,11 +122,11 @@ static __always_inline void arch_change_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { - asm volatile(LOCK_PREFIX "xorb %b1,%0" + asm_inline volatile(LOCK_PREFIX "xorb %b1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" (CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" + asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 5612648b0202..5a9acaa6a56b 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -44,22 +44,22 @@ extern void __add_wrong_size(void) __typeof__ (*(ptr)) __ret = (arg); \ switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ - asm volatile (lock #op "b %b0, %1\n" \ + asm_inline volatile (lock #op "b %b0, %1" \ : "+q" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_W: \ - asm volatile (lock #op "w %w0, %1\n" \ + asm_inline volatile (lock #op "w %w0, %1" \ : "+r" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_L: \ - asm volatile (lock #op "l %0, %1\n" \ + asm_inline volatile (lock #op "l %0, %1" \ : "+r" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_Q: \ - asm volatile (lock #op "q %q0, %1\n" \ + asm_inline volatile (lock #op "q %q0, %1" \ : "+r" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ @@ -91,7 +91,7 @@ extern void __add_wrong_size(void) case __X86_CASE_B: \ { \ volatile u8 *__ptr = (volatile u8 *)(ptr); \ - asm volatile(lock "cmpxchgb %2,%1" \ + asm_inline volatile(lock "cmpxchgb %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "q" (__new), "0" (__old) \ : "memory"); \ @@ -100,7 +100,7 @@ extern void __add_wrong_size(void) case __X86_CASE_W: \ { \ volatile u16 *__ptr = (volatile u16 *)(ptr); \ - asm volatile(lock "cmpxchgw %2,%1" \ + asm_inline volatile(lock "cmpxchgw %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ @@ -109,7 +109,7 @@ extern void __add_wrong_size(void) case __X86_CASE_L: \ { \ volatile u32 *__ptr = (volatile u32 *)(ptr); \ - asm volatile(lock "cmpxchgl %2,%1" \ + asm_inline volatile(lock "cmpxchgl %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ @@ -118,7 +118,7 @@ extern void __add_wrong_size(void) case __X86_CASE_Q: \ { \ volatile u64 *__ptr = (volatile u64 *)(ptr); \ - asm volatile(lock "cmpxchgq %2,%1" \ + asm_inline volatile(lock "cmpxchgq %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ @@ -165,7 +165,7 @@ extern void __add_wrong_size(void) case __X86_CASE_B: \ { \ volatile u8 *__ptr = (volatile u8 *)(_ptr); \ - asm volatile(lock "cmpxchgb %[new], %[ptr]" \ + asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \ CC_SET(z) \ : CC_OUT(z) (success), \ [ptr] "+m" (*__ptr), \ @@ -177,7 +177,7 @@ extern void __add_wrong_size(void) case __X86_CASE_W: \ { \ volatile u16 *__ptr = (volatile u16 *)(_ptr); \ - asm volatile(lock "cmpxchgw %[new], %[ptr]" \ + asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \ CC_SET(z) \ : CC_OUT(z) (success), \ [ptr] "+m" (*__ptr), \ @@ -189,7 +189,7 @@ extern void __add_wrong_size(void) case __X86_CASE_L: \ { \ volatile u32 *__ptr = (volatile u32 *)(_ptr); \ - asm volatile(lock "cmpxchgl %[new], %[ptr]" \ + asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \ CC_SET(z) \ : CC_OUT(z) (success), \ [ptr] "+m" (*__ptr), \ @@ -201,7 +201,7 @@ extern void __add_wrong_size(void) case __X86_CASE_Q: \ { \ volatile u64 *__ptr = (volatile u64 *)(_ptr); \ - asm volatile(lock "cmpxchgq %[new], %[ptr]" \ + asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \ CC_SET(z) \ : CC_OUT(z) (success), \ [ptr] "+m" (*__ptr), \ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ee89fbc4dd4b..3c0c6d21226f 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -19,7 +19,7 @@ union __u64_halves { union __u64_halves o = { .full = (_old), }, \ n = { .full = (_new), }; \ \ - asm volatile(_lock "cmpxchg8b %[ptr]" \ + asm_inline volatile(_lock "cmpxchg8b %[ptr]" \ : [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ : "b" (n.low), "c" (n.high) \ @@ -45,7 +45,7 @@ static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new n = { .full = (_new), }; \ bool ret; \ \ - asm volatile(_lock "cmpxchg8b %[ptr]" \ + asm_inline volatile(_lock "cmpxchg8b %[ptr]" \ CC_SET(e) \ : CC_OUT(e) (ret), \ [ptr] "+m" (*(_ptr)), \ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 5e241306db26..71d1e72ed879 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -38,7 +38,7 @@ union __u128_halves { union __u128_halves o = { .full = (_old), }, \ n = { .full = (_new), }; \ \ - asm volatile(_lock "cmpxchg16b %[ptr]" \ + asm_inline volatile(_lock "cmpxchg16b %[ptr]" \ : [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ : "b" (n.low), "c" (n.high) \ @@ -65,7 +65,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, n = { .full = (_new), }; \ bool ret; \ \ - asm volatile(_lock "cmpxchg16b %[ptr]" \ + asm_inline volatile(_lock "cmpxchg16b %[ptr]" \ CC_SET(e) \ : CC_OUT(e) (ret), \ [ptr] "+m" (*(_ptr)), \ diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 363266cbcada..3821ee3fae35 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -29,7 +29,7 @@ cc_label: c = true; \ #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c; \ - asm volatile (fullop CC_SET(cc) \ + asm_inline volatile (fullop CC_SET(cc) \ : [var] "+m" (_var), CC_OUT(cc) (c) \ : __VA_ARGS__ : clobbers); \ c; \ -- 2.51.0 From 8a141be3233af7d4f7014ebc44d5452d46b2b1be Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 10 Mar 2025 11:42:56 +0100 Subject: [PATCH 11/16] x86/headers: Replace __ASSEMBLY__ with __ASSEMBLER__ in UAPI headers __ASSEMBLY__ is only defined by the Makefile of the kernel, so this is not really useful for UAPI headers (unless the userspace Makefile defines it, too). Let's switch to __ASSEMBLER__ which gets set automatically by the compiler when compiling assembly code. Signed-off-by: Thomas Huth Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Linus Torvalds Cc: Kees Cook Cc: Brian Gerst Link: https://lore.kernel.org/r/20250310104256.123527-1-thuth@redhat.com --- arch/x86/include/uapi/asm/bootparam.h | 4 ++-- arch/x86/include/uapi/asm/e820.h | 4 ++-- arch/x86/include/uapi/asm/ldt.h | 4 ++-- arch/x86/include/uapi/asm/msr.h | 4 ++-- arch/x86/include/uapi/asm/ptrace-abi.h | 6 +++--- arch/x86/include/uapi/asm/ptrace.h | 4 ++-- arch/x86/include/uapi/asm/setup_data.h | 4 ++-- arch/x86/include/uapi/asm/signal.h | 8 ++++---- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 9b82eebd7add..dafbf581c515 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -26,7 +26,7 @@ #define XLF_5LEVEL_ENABLED (1<<6) #define XLF_MEM_ENCRYPTION (1<<7) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -210,6 +210,6 @@ enum x86_hardware_subarch { X86_NR_SUBARCHS, }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h index 2f491efe3a12..55bc66867156 100644 --- a/arch/x86/include/uapi/asm/e820.h +++ b/arch/x86/include/uapi/asm/e820.h @@ -54,7 +54,7 @@ */ #define E820_RESERVED_KERN 128 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include struct e820entry { __u64 addr; /* start of memory segment */ @@ -76,7 +76,7 @@ struct e820map { #define BIOS_ROM_BASE 0xffe00000 #define BIOS_ROM_END 0xffffffff -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_E820_H */ diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index d62ac5db093b..a82c039d8e6a 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -12,7 +12,7 @@ /* The size of each LDT entry. */ #define LDT_ENTRY_SIZE 8 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Note on 64bit base and limit is ignored and you cannot set DS/ES/CS * not to the default values if you still want to do syscalls. This @@ -44,5 +44,5 @@ struct user_desc { #define MODIFY_LDT_CONTENTS_STACK 1 #define MODIFY_LDT_CONTENTS_CODE 2 -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_LDT_H */ diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h index e7516b402a00..4b8917ca28fe 100644 --- a/arch/x86/include/uapi/asm/msr.h +++ b/arch/x86/include/uapi/asm/msr.h @@ -2,7 +2,7 @@ #ifndef _UAPI_ASM_X86_MSR_H #define _UAPI_ASM_X86_MSR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -10,5 +10,5 @@ #define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8]) #define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8]) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_MSR_H */ diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h index 16074b9c93bb..5823584dea13 100644 --- a/arch/x86/include/uapi/asm/ptrace-abi.h +++ b/arch/x86/include/uapi/asm/ptrace-abi.h @@ -25,7 +25,7 @@ #else /* __i386__ */ -#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) +#if defined(__ASSEMBLER__) || defined(__FRAME_OFFSETS) /* * C ABI says these regs are callee-preserved. They aren't saved on kernel entry * unless syscall needs a complete, fully filled "struct pt_regs". @@ -57,7 +57,7 @@ #define EFLAGS 144 #define RSP 152 #define SS 160 -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* top of stack page */ #define FRAME_SIZE 168 @@ -87,7 +87,7 @@ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #endif diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h index 85165c0edafc..e0b5b4f6226b 100644 --- a/arch/x86/include/uapi/asm/ptrace.h +++ b/arch/x86/include/uapi/asm/ptrace.h @@ -7,7 +7,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef __i386__ /* this struct defines the way the registers are stored on the @@ -81,6 +81,6 @@ struct pt_regs { -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h index b111b0c18544..50c45ead4e7c 100644 --- a/arch/x86/include/uapi/asm/setup_data.h +++ b/arch/x86/include/uapi/asm/setup_data.h @@ -18,7 +18,7 @@ #define SETUP_INDIRECT (1<<31) #define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -78,6 +78,6 @@ struct ima_setup_data { __u64 size; } __attribute__((packed)); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_SETUP_DATA_H */ diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h index f777346450ec..1067efabf18b 100644 --- a/arch/x86/include/uapi/asm/signal.h +++ b/arch/x86/include/uapi/asm/signal.h @@ -2,7 +2,7 @@ #ifndef _UAPI_ASM_X86_SIGNAL_H #define _UAPI_ASM_X86_SIGNAL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -16,7 +16,7 @@ struct siginfo; typedef unsigned long sigset_t; #endif /* __KERNEL__ */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define SIGHUP 1 @@ -68,7 +68,7 @@ typedef unsigned long sigset_t; #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ # ifndef __KERNEL__ @@ -106,6 +106,6 @@ typedef struct sigaltstack { __kernel_size_t ss_size; } stack_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_SIGNAL_H */ -- 2.51.0 From 24a295e4ef1ca8e97d8b7015e1887b6e83e1c8be Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 19 Mar 2025 11:30:57 +0100 Subject: [PATCH 12/16] x86/headers: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-UAPI headers While the GCC and Clang compilers already define __ASSEMBLER__ automatically when compiling assembly code, __ASSEMBLY__ is a macro that only gets defined by the Makefiles in the kernel. This can be very confusing when switching between userspace and kernelspace coding, or when dealing with UAPI headers that rather should use __ASSEMBLER__ instead. So let's standardize on the __ASSEMBLER__ macro that is provided by the compilers now. This is mostly a mechanical patch (done with a simple "sed -i" statement), with some manual tweaks in , and that mentioned this macro in comments with some missing underscores. Signed-off-by: Thomas Huth Signed-off-by: Ingo Molnar Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250314071013.1575167-38-thuth@redhat.com --- arch/x86/boot/boot.h | 4 ++-- arch/x86/entry/vdso/extable.h | 2 +- arch/x86/include/asm/alternative.h | 6 +++--- arch/x86/include/asm/asm.h | 10 +++++----- arch/x86/include/asm/boot.h | 2 +- arch/x86/include/asm/cpufeature.h | 4 ++-- arch/x86/include/asm/cpumask.h | 4 ++-- arch/x86/include/asm/current.h | 4 ++-- arch/x86/include/asm/desc_defs.h | 4 ++-- arch/x86/include/asm/dwarf2.h | 2 +- arch/x86/include/asm/fixmap.h | 4 ++-- arch/x86/include/asm/frame.h | 10 +++++----- arch/x86/include/asm/fred.h | 4 ++-- arch/x86/include/asm/fsgsbase.h | 4 ++-- arch/x86/include/asm/ftrace.h | 8 ++++---- arch/x86/include/asm/hw_irq.h | 4 ++-- arch/x86/include/asm/ibt.h | 12 ++++++------ arch/x86/include/asm/idtentry.h | 6 +++--- arch/x86/include/asm/inst.h | 2 +- arch/x86/include/asm/irqflags.h | 10 +++++----- arch/x86/include/asm/jump_label.h | 4 ++-- arch/x86/include/asm/kasan.h | 2 +- arch/x86/include/asm/kexec.h | 4 ++-- arch/x86/include/asm/linkage.h | 6 +++--- arch/x86/include/asm/mem_encrypt.h | 4 ++-- arch/x86/include/asm/msr.h | 4 ++-- arch/x86/include/asm/nops.h | 2 +- arch/x86/include/asm/nospec-branch.h | 6 +++--- arch/x86/include/asm/orc_types.h | 4 ++-- arch/x86/include/asm/page.h | 4 ++-- arch/x86/include/asm/page_32.h | 4 ++-- arch/x86/include/asm/page_32_types.h | 4 ++-- arch/x86/include/asm/page_64.h | 4 ++-- arch/x86/include/asm/page_64_types.h | 2 +- arch/x86/include/asm/page_types.h | 4 ++-- arch/x86/include/asm/paravirt.h | 14 +++++++------- arch/x86/include/asm/paravirt_types.h | 4 ++-- arch/x86/include/asm/percpu.h | 4 ++-- arch/x86/include/asm/pgtable-2level_types.h | 4 ++-- arch/x86/include/asm/pgtable-3level_types.h | 4 ++-- arch/x86/include/asm/pgtable-invert.h | 4 ++-- arch/x86/include/asm/pgtable.h | 12 ++++++------ arch/x86/include/asm/pgtable_32.h | 4 ++-- arch/x86/include/asm/pgtable_32_areas.h | 2 +- arch/x86/include/asm/pgtable_64.h | 6 +++--- arch/x86/include/asm/pgtable_64_types.h | 4 ++-- arch/x86/include/asm/pgtable_types.h | 10 +++++----- arch/x86/include/asm/prom.h | 4 ++-- arch/x86/include/asm/pti.h | 4 ++-- arch/x86/include/asm/ptrace.h | 4 ++-- arch/x86/include/asm/purgatory.h | 4 ++-- arch/x86/include/asm/pvclock-abi.h | 4 ++-- arch/x86/include/asm/realmode.h | 4 ++-- arch/x86/include/asm/segment.h | 8 ++++---- arch/x86/include/asm/setup.h | 6 +++--- arch/x86/include/asm/setup_data.h | 4 ++-- arch/x86/include/asm/shared/tdx.h | 4 ++-- arch/x86/include/asm/shstk.h | 4 ++-- arch/x86/include/asm/signal.h | 8 ++++---- arch/x86/include/asm/smap.h | 6 +++--- arch/x86/include/asm/smp.h | 4 ++-- arch/x86/include/asm/tdx.h | 4 ++-- arch/x86/include/asm/thread_info.h | 12 ++++++------ arch/x86/include/asm/unwind_hints.h | 4 ++-- arch/x86/include/asm/vdso/getrandom.h | 4 ++-- arch/x86/include/asm/vdso/gettimeofday.h | 4 ++-- arch/x86/include/asm/vdso/processor.h | 4 ++-- arch/x86/include/asm/vdso/vsyscall.h | 4 ++-- arch/x86/include/asm/xen/interface.h | 10 +++++----- arch/x86/include/asm/xen/interface_32.h | 4 ++-- arch/x86/include/asm/xen/interface_64.h | 4 ++-- arch/x86/math-emu/control_w.h | 2 +- arch/x86/math-emu/exception.h | 6 +++--- arch/x86/math-emu/fpu_emu.h | 6 +++--- arch/x86/math-emu/status_w.h | 6 +++--- arch/x86/realmode/rm/realmode.h | 4 ++-- arch/x86/realmode/rm/wakeup.h | 2 +- tools/arch/x86/include/asm/asm.h | 8 ++++---- tools/arch/x86/include/asm/nops.h | 2 +- tools/arch/x86/include/asm/orc_types.h | 4 ++-- tools/arch/x86/include/asm/pvclock-abi.h | 4 ++-- 81 files changed, 201 insertions(+), 201 deletions(-) diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 0f24f7ebec9b..38f17a1e1e36 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -16,7 +16,7 @@ #define STACK_SIZE 1024 /* Minimum number of bytes for stack */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -327,6 +327,6 @@ void probe_cards(int unsafe); /* video-vesa.c */ void vesa_store_edid(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/x86/entry/vdso/extable.h b/arch/x86/entry/vdso/extable.h index b56f6b012941..baba612b832c 100644 --- a/arch/x86/entry/vdso/extable.h +++ b/arch/x86/entry/vdso/extable.h @@ -7,7 +7,7 @@ * vDSO uses a dedicated handler the addresses are relative to the overall * exception table, not each individual entry. */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ASM_VDSO_EXTABLE_HANDLE(from, to) \ ASM_VDSO_EXTABLE_HANDLE from to diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 6bf1970493a5..69f25e6906b9 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -15,7 +15,7 @@ #define ALT_DIRECT_CALL(feature) ((ALT_FLAG_DIRECT_CALL << ALT_FLAGS_SHIFT) | (feature)) #define ALT_CALL_ALWAYS ALT_DIRECT_CALL(X86_FEATURE_ALWAYS) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -277,7 +277,7 @@ static inline int alternatives_text_reserved(void *start, void *end) void BUG_func(void); void nop_func(void); -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #ifdef CONFIG_SMP .macro LOCK_PREFIX @@ -360,6 +360,6 @@ void nop_func(void); ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ newinstr_yes, ft_flags -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 975ae7a9397e..cc2881576c2c 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_ASM_H #define _ASM_X86_ASM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define __ASM_FORM(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, @@ -113,7 +113,7 @@ #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef __pic__ static __always_inline __pure void *rip_rel_ptr(void *p) { @@ -144,7 +144,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p) # include /* Exception table entry */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define _ASM_EXTABLE_TYPE(from, to, type) \ .pushsection "__ex_table","a" ; \ @@ -164,7 +164,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p) # define _ASM_NOKPROBE(entry) # endif -#else /* ! __ASSEMBLY__ */ +#else /* ! __ASSEMBLER__ */ # define DEFINE_EXTABLE_TYPE_REG \ ".macro extable_type_reg type:req reg:req\n" \ @@ -232,7 +232,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p) */ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define _ASM_EXTABLE(from, to) \ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT) diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 3e5b111e619d..3f02ff6d333d 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -74,7 +74,7 @@ # define BOOT_STACK_SIZE 0x1000 #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned int output_len; extern const unsigned long kernel_text_size; extern const unsigned long kernel_total_size; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index fe6994fa58bf..893cbca37fe9 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -4,7 +4,7 @@ #include -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#if defined(__KERNEL__) && !defined(__ASSEMBLER__) #include #include @@ -137,5 +137,5 @@ t_no: #define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ boot_cpu_data.x86_model -#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */ #endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 4acfd57de8f1..70f6b60ad67b 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPUMASK_H #define _ASM_X86_CPUMASK_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include extern void setup_cpu_local_masks(void); @@ -34,5 +34,5 @@ static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp #define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu)) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index dea7d8b854f0..cc4a3f725b37 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -5,7 +5,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -27,6 +27,6 @@ static __always_inline struct task_struct *get_current(void) #define current get_current() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index d440a65af8f3..7e6b9314758a 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -58,7 +58,7 @@ #define DESC_USER (_DESC_DPL(3)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -166,7 +166,7 @@ struct desc_ptr { unsigned long address; } __attribute__((packed)) ; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Boot IDT definitions */ #define BOOT_IDT_ENTRIES 32 diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 430fca13bb56..302e11b15da8 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_DWARF2_H #define _ASM_X86_DWARF2_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #warning "asm/dwarf2.h should be only included in pure assembly files" #endif diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index d0dcefb5cc59..4519c9f35ba0 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -31,7 +31,7 @@ /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ #define FIXMAP_PMD_TOP 507 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #include @@ -196,5 +196,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, void __early_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index fb42659f6e98..0ab65073c1cc 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -11,7 +11,7 @@ #ifdef CONFIG_FRAME_POINTER -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro FRAME_BEGIN push %_ASM_BP @@ -51,7 +51,7 @@ .endm #endif /* CONFIG_X86_64 */ -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #define FRAME_BEGIN \ "push %" _ASM_BP "\n" \ @@ -82,18 +82,18 @@ static inline unsigned long encode_frame_pointer(struct pt_regs *regs) #endif /* CONFIG_X86_64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define FRAME_OFFSET __ASM_SEL(4, 8) #else /* !CONFIG_FRAME_POINTER */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro ENCODE_FRAME_POINTER ptregs_offset=0 .endm -#else /* !__ASSEMBLY */ +#else /* !__ASSEMBLER__ */ #define ENCODE_FRAME_POINTER diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h index 25ca00bd70e8..2a29e5216881 100644 --- a/arch/x86/include/asm/fred.h +++ b/arch/x86/include/asm/fred.h @@ -32,7 +32,7 @@ #define FRED_CONFIG_INT_STKLVL(l) (_AT(unsigned long, l) << 9) #define FRED_CONFIG_ENTRYPOINT(p) _AT(unsigned long, (p)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_X86_FRED #include @@ -113,6 +113,6 @@ static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { static inline void fred_sync_rsp0(unsigned long rsp0) { } static inline void fred_update_rsp0(void) { } #endif /* CONFIG_X86_FRED */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* ASM_X86_FRED_H */ diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h index 9e7e8ca8e299..02f239569b93 100644 --- a/arch/x86/include/asm/fsgsbase.h +++ b/arch/x86/include/asm/fsgsbase.h @@ -2,7 +2,7 @@ #ifndef _ASM_FSGSBASE_H #define _ASM_FSGSBASE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_X86_64 @@ -80,6 +80,6 @@ extern unsigned long x86_fsgsbase_read_task(struct task_struct *task, #endif /* CONFIG_X86_64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_FSGSBASE_H */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index f2265246249a..93156ac4ffe0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -22,7 +22,7 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) @@ -106,11 +106,11 @@ struct dyn_arch_ftrace { }; #endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void prepare_ftrace_return(unsigned long ip, unsigned long *parent, unsigned long frame_pointer); @@ -154,6 +154,6 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) } #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */ #endif /* !COMPILE_OFFSETS */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index edebf1020e04..162ebd73a698 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -16,7 +16,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -128,6 +128,6 @@ extern char spurious_entries_start[]; typedef struct irq_desc* vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -#endif /* !ASSEMBLY_ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_HW_IRQ_H */ diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index 9423a2967f50..28d845257303 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -21,7 +21,7 @@ #define HAS_KERNEL_IBT 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_X86_64 #define ASM_ENDBR "endbr64\n\t" @@ -82,7 +82,7 @@ extern __noendbr bool is_endbr(u32 *val); extern __noendbr u64 ibt_save(bool disable); extern __noendbr void ibt_restore(u64 save); -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #ifdef CONFIG_X86_64 #define ENDBR endbr64 @@ -90,13 +90,13 @@ extern __noendbr void ibt_restore(u64 save); #define ENDBR endbr32 #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #else /* !IBT */ #define HAS_KERNEL_IBT 0 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define ASM_ENDBR #define IBT_NOSEAL(name) @@ -108,11 +108,11 @@ static inline bool is_endbr(u32 *val) { return false; } static inline u64 ibt_save(bool disable) { return 0; } static inline void ibt_restore(u64 save) { } -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define ENDBR -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_X86_KERNEL_IBT */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index ad5c68f0509d..a4ec27c67988 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -7,7 +7,7 @@ #define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -474,7 +474,7 @@ static inline void fred_install_sysvec(unsigned int vector, const idtentry_t fun idt_install_sysvec(vector, asm_##function); \ } -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ /* * The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs. @@ -579,7 +579,7 @@ SYM_CODE_START(spurious_entries_start) SYM_CODE_END(spurious_entries_start) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * The actual entry points. Note that DECLARE_IDTENTRY*() serves two diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index 438ccd4f3cc4..e48a00b3311d 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h @@ -6,7 +6,7 @@ #ifndef X86_ASM_INST_H #define X86_ASM_INST_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define REG_NUM_INVALID 100 diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index cf7fc2b8e3ce..abb8374c9ff7 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -4,7 +4,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -79,7 +79,7 @@ static __always_inline void native_local_irq_restore(unsigned long flags) #ifdef CONFIG_PARAVIRT_XXL #include #else -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include static __always_inline unsigned long arch_local_save_flags(void) @@ -133,10 +133,10 @@ static __always_inline unsigned long arch_local_irq_save(void) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_PARAVIRT_XXL */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static __always_inline int arch_irqs_disabled_flags(unsigned long flags) { return !(flags & X86_EFLAGS_IF); @@ -154,6 +154,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags) if (!arch_irqs_disabled_flags(flags)) arch_local_irq_enable(); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 3f1c1d6c0da1..61dd1dee7812 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -7,7 +7,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -55,6 +55,6 @@ l_yes: extern int arch_jump_entry_size(struct jump_entry *entry); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index de75306b932e..d7e33c7f096b 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -23,7 +23,7 @@ (1ULL << (__VIRTUAL_MASK_SHIFT - \ KASAN_SHADOW_SCALE_SHIFT))) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_KASAN void __init kasan_early_init(void); diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index e3589d6aec24..5432457d2338 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -13,7 +13,7 @@ # define KEXEC_CONTROL_PAGE_SIZE 4096 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -217,6 +217,6 @@ unsigned int arch_crash_get_elfcorehdr_size(void); #define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 4835c67bb5dd..b51d8a4673f5 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -38,7 +38,7 @@ #define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN) #define SYM_F_ALIGN __FUNC_ALIGN -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk @@ -50,7 +50,7 @@ #endif #endif /* CONFIG_MITIGATION_RETPOLINE */ -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define ASM_RET "jmp __x86_return_thunk\n\t" @@ -62,7 +62,7 @@ #endif #endif /* CONFIG_MITIGATION_RETPOLINE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index f922b682b9b4..1530ee301dfe 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -10,7 +10,7 @@ #ifndef __X86_MEM_ENCRYPT_H__ #define __X86_MEM_ENCRYPT_H__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -114,6 +114,6 @@ void add_encrypt_protection_map(void); extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 001853541f1e..9397a319d165 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -4,7 +4,7 @@ #include "msr-index.h" -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -397,5 +397,5 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) return wrmsr_safe_regs(regs); } #endif /* CONFIG_SMP */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_MSR_H */ diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index 1c1b7550fa55..cd94221d8335 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -82,7 +82,7 @@ #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern const unsigned char * const x86_nops[]; #endif diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 44c6076fd22b..804b66a7686a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -176,7 +176,7 @@ add $(BITS_PER_LONG/8), %_ASM_SP; \ lfence; -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions @@ -334,7 +334,7 @@ #define CLEAR_BRANCH_HISTORY_VMEXIT #endif -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; @@ -603,6 +603,6 @@ static __always_inline void mds_idle_clear_cpu_buffers(void) mds_clear_cpu_buffers(); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 46d7e06763c9..e0125afa53fb 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -45,7 +45,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include /* @@ -73,6 +73,6 @@ struct orc_entry { #endif } __packed; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index c9fe207916f4..9265f2fca99a 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -14,7 +14,7 @@ #include #endif /* CONFIG_X86_64 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct page; @@ -84,7 +84,7 @@ static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits) return __canonical_address(vaddr, vaddr_bits) == vaddr; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include #include diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 580d71aca65a..0c623706cb7e 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -4,7 +4,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) #ifdef CONFIG_DEBUG_VIRTUAL @@ -26,6 +26,6 @@ static inline void copy_page(void *to, void *from) { memcpy(to, from, PAGE_SIZE); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PAGE_32_H */ diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 25c32652f404..a9b62e0e6f79 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -63,7 +63,7 @@ */ #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This much address space is reserved for vmalloc() and iomap() @@ -75,6 +75,6 @@ extern int sysctl_legacy_va_layout; extern void find_low_pfn_range(void); extern void setup_bootmem_allocator(void); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PAGE_32_DEFS_H */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index b5279f5d5601..d3aab6f4e59a 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -4,7 +4,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -95,7 +95,7 @@ static __always_inline unsigned long task_size_max(void) } #endif /* CONFIG_X86_5LEVEL */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION # define __HAVE_ARCH_GATE_AREA 1 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 06ef25411d62..1faa8f88850a 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #endif diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 974688973cf6..9f77bf03d747 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -43,7 +43,7 @@ #define IOREMAP_MAX_ORDER (PMD_SHIFT) #endif /* CONFIG_X86_64 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK extern phys_addr_t physical_mask; @@ -66,6 +66,6 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); extern void initmem_init(void); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PAGE_DEFS_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 38a632a282d4..bed346bfac89 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -6,7 +6,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct mm_struct; #endif @@ -15,7 +15,7 @@ struct mm_struct; #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #include @@ -715,7 +715,7 @@ static __always_inline unsigned long arch_local_irq_save(void) extern void default_banner(void); void native_pv_lock_init(void) __init; -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT_XXL @@ -735,18 +735,18 @@ void native_pv_lock_init(void) __init; #endif /* CONFIG_PARAVIRT_XXL */ #endif /* CONFIG_X86_64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void native_pv_lock_init(void) { } #endif #endif /* !CONFIG_PARAVIRT */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_enter_mmap(struct mm_struct *mm) { @@ -764,5 +764,5 @@ static inline void paravirt_set_cap(void) { } #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 127a372dacc9..62912023b46f 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -4,7 +4,7 @@ #ifdef CONFIG_PARAVIRT -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -525,7 +525,7 @@ unsigned long pv_native_read_cr2(void); #define paravirt_nop ((void *)nop_func) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 462d071c87d4..105db2d33c7b 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -10,7 +10,7 @@ # define __percpu_rel #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef CONFIG_SMP # define __percpu %__percpu_seg: @@ -588,7 +588,7 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU_CACHE_HOT(unsigned long, this_cpu_off); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 4a12c276b181..66425424ce91 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H #define _ASM_X86_PGTABLE_2LEVEL_DEFS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include typedef unsigned long pteval_t; @@ -16,7 +16,7 @@ typedef union { pteval_t pte; pteval_t pte_low; } pte_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define SHARED_KERNEL_PMD 0 diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 80911349519e..9d5b257d44e3 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H #define _ASM_X86_PGTABLE_3LEVEL_DEFS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include typedef u64 pteval_t; @@ -25,7 +25,7 @@ typedef union { }; pmdval_t pmd; } pmd_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI)) diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h index a0c1525f1b6f..e12e52ae8083 100644 --- a/arch/x86/include/asm/pgtable-invert.h +++ b/arch/x86/include/asm/pgtable-invert.h @@ -2,7 +2,7 @@ #ifndef _ASM_PGTABLE_INVERT_H #define _ASM_PGTABLE_INVERT_H 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * A clear pte value is special, and doesn't get inverted. @@ -36,6 +36,6 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) return val; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 593f10aabd45..7bd6bd6df4a1 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,7 +15,7 @@ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #include @@ -973,7 +973,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) } #endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifdef CONFIG_X86_32 @@ -982,7 +982,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) # include #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #include @@ -1233,12 +1233,12 @@ static inline int pgd_none(pgd_t pgd) } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern int direct_gbpages; void init_mem_mapping(void); @@ -1812,6 +1812,6 @@ bool arch_is_platform_page(u64 paddr); WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ set_pgd(pgdp, pgd); \ }) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PGTABLE_H */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 7d4ad8907297..b612cc57a4d3 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -13,7 +13,7 @@ * This file contains the functions and defines necessary to modify and use * the i386 page table tree. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #include @@ -45,7 +45,7 @@ do { \ flush_tlb_one_kernel((vaddr)); \ } while (0) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * This is used to calculate the .brk reservation for initial pagetables. diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h index b6355416a15a..921148b42967 100644 --- a/arch/x86/include/asm/pgtable_32_areas.h +++ b/arch/x86/include/asm/pgtable_32_areas.h @@ -13,7 +13,7 @@ */ #define VMALLOC_OFFSET (8 * 1024 * 1024) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern bool __vmalloc_start_set; /* set once high_memory is set */ #endif diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index d1426b64c1b9..b89f8f1194a9 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -5,7 +5,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This file contains the functions and defines necessary to modify and use @@ -270,7 +270,7 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) #include -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define l4_index(x) (((x) >> 39) & 511) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) @@ -291,5 +291,5 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) i = i + 1 ; \ .endr -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index ec68f8369bdc..5bb782d856f2 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -4,7 +4,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -44,7 +44,7 @@ static inline bool pgtable_l5_enabled(void) extern unsigned int pgdir_shift; extern unsigned int ptrs_per_p4d; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define SHARED_KERNEL_PMD 0 diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index c90e9c51edb7..b2ed8198d5cd 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -166,7 +166,7 @@ * to have the WB mode at index 0 (all bits clear). This is the default * right now and likely would break too much if changed. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ enum page_cache_mode { _PAGE_CACHE_MODE_WB = 0, _PAGE_CACHE_MODE_WC = 1, @@ -241,7 +241,7 @@ enum page_cache_mode { #define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC) #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC) @@ -264,7 +264,7 @@ enum page_cache_mode { #define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO) #define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * early identity mapping pte attrib macros. @@ -283,7 +283,7 @@ enum page_cache_mode { # include #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -582,6 +582,6 @@ extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long page_flags); extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, unsigned long numpages); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index 365798cb4408..5d0dbab85264 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -8,7 +8,7 @@ #ifndef _ASM_X86_PROM_H #define _ASM_X86_PROM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -33,5 +33,5 @@ static inline void x86_flattree_get_config(void) { } extern char cmd_line[COMMAND_LINE_SIZE]; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h index ab167c96b9ab..88d0a1ab1f77 100644 --- a/arch/x86/include/asm/pti.h +++ b/arch/x86/include/asm/pti.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PTI_H #define _ASM_X86_PTI_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION extern void pti_init(void); @@ -11,5 +11,5 @@ extern void pti_finalize(void); static inline void pti_check_boottime_disable(void) { } #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PTI_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 5a83fbd9bc0b..50f75467f73d 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -6,7 +6,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef __i386__ struct pt_regs { @@ -469,5 +469,5 @@ extern int do_set_thread_area(struct task_struct *p, int idx, # define do_set_thread_area_64(p, s, t) (0) #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h index 5528e9325049..2fee5e9f1ccc 100644 --- a/arch/x86/include/asm/purgatory.h +++ b/arch/x86/include/asm/purgatory.h @@ -2,10 +2,10 @@ #ifndef _ASM_X86_PURGATORY_H #define _ASM_X86_PURGATORY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include extern void purgatory(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 1436226efe3e..b9fece5fc96d 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PVCLOCK_ABI_H #define _ASM_X86_PVCLOCK_ABI_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * These structs MUST NOT be changed. @@ -44,5 +44,5 @@ struct pvclock_wall_clock { #define PVCLOCK_GUEST_STOPPED (1 << 1) /* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 87e5482acd0d..f607081a022a 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -9,7 +9,7 @@ #define TH_FLAGS_SME_ACTIVE_BIT 0 #define TH_FLAGS_SME_ACTIVE BIT(TH_FLAGS_SME_ACTIVE_BIT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -95,6 +95,6 @@ void reserve_real_mode(void); void load_trampoline_pgtable(void); void init_real_mode(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ARCH_X86_REALMODE_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 9d6411c65920..77d8f49b92bd 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -233,7 +233,7 @@ #define VDSO_CPUNODE_BITS 12 #define VDSO_CPUNODE_MASK 0xfff -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Helper functions to store/load CPU and node numbers */ @@ -265,7 +265,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) *node = (p >> VDSO_CPUNODE_BITS); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef __KERNEL__ @@ -286,7 +286,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) */ #define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); @@ -350,7 +350,7 @@ static inline void __loadsegment_fs(unsigned short value) #define savesegment(seg, value) \ asm("mov %%" #seg ",%0":"=r" (value) : : "memory") -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __KERNEL__ */ #endif /* _ASM_X86_SEGMENT_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index a8d676bba5de..ad9212df0ec0 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -27,7 +27,7 @@ #define OLD_CL_ADDRESS 0x020 /* Relative to real mode data */ #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -142,7 +142,7 @@ extern bool builtin_cmdline_added __ro_after_init; #define builtin_cmdline_added 0 #endif -#else /* __ASSEMBLY */ +#else /* __ASSEMBLER__ */ .macro __RESERVE_BRK name, size .pushsection .bss..brk, "aw" @@ -154,6 +154,6 @@ SYM_DATA_END(__brk_\name) #define RESERVE_BRK(name, size) __RESERVE_BRK name, size -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SETUP_H */ diff --git a/arch/x86/include/asm/setup_data.h b/arch/x86/include/asm/setup_data.h index 77c51111a893..7bb16f843c93 100644 --- a/arch/x86/include/asm/setup_data.h +++ b/arch/x86/include/asm/setup_data.h @@ -4,7 +4,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct pci_setup_rom { struct setup_data data; @@ -27,6 +27,6 @@ struct efi_setup_data { u64 reserved[8]; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index fcbbef484a78..a28ff6b14145 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -106,7 +106,7 @@ #define TDX_PS_1G 2 #define TDX_PS_NR (TDX_PS_1G + 1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -177,5 +177,5 @@ static __always_inline u64 hcall_func(u64 exit_reason) return exit_reason; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_SHARED_TDX_H */ diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h index 4cb77e004615..ba6f2fe43848 100644 --- a/arch/x86/include/asm/shstk.h +++ b/arch/x86/include/asm/shstk.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_SHSTK_H #define _ASM_X86_SHSTK_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include struct task_struct; @@ -37,6 +37,6 @@ static inline int shstk_update_last_frame(unsigned long val) { return 0; } static inline bool shstk_is_enabled(void) { return false; } #endif /* CONFIG_X86_USER_SHADOW_STACK */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SHSTK_H */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 4a4043ca6493..c72d46175374 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_SIGNAL_H #define _ASM_X86_SIGNAL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include /* Most things should be clean enough to redefine this at will, if care @@ -28,9 +28,9 @@ typedef struct { #define SA_IA32_ABI 0x02000000u #define SA_X32_ABI 0x01000000u -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __ARCH_HAS_SA_RESTORER @@ -101,5 +101,5 @@ struct pt_regs; #endif /* !__i386__ */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SIGNAL_H */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 2de1e5a75c57..daea94c2993c 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -13,7 +13,7 @@ #include #include -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define ASM_CLAC \ ALTERNATIVE "", "clac", X86_FEATURE_SMAP @@ -21,7 +21,7 @@ #define ASM_STAC \ ALTERNATIVE "", "stac", X86_FEATURE_SMAP -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ static __always_inline void clac(void) { @@ -61,6 +61,6 @@ static __always_inline void smap_restore(unsigned long flags) #define ASM_STAC \ ALTERNATIVE("", "stac", X86_FEATURE_SMAP) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SMAP_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index bcfa00232d79..0c1c68039d6f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SMP_H #define _ASM_X86_SMP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -171,7 +171,7 @@ extern void nmi_selftest(void); extern unsigned int smpboot_control; extern unsigned long apic_mmio_base; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Control bits for startup_64 */ #define STARTUP_READ_APICID 0x80000000 diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index b4b16dafd55e..65394aa9b49f 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -30,7 +30,7 @@ #define TDX_SUCCESS 0ULL #define TDX_RND_NO_ENTROPY 0x8000020300000000ULL -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -126,5 +126,5 @@ static inline int tdx_enable(void) { return -ENODEV; } static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; } #endif /* CONFIG_INTEL_TDX_HOST */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_TDX_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a55c214f3ba6..9282465eea21 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -54,7 +54,7 @@ * - this struct should fit entirely inside of one cache line * - this struct shares the supervisor stack pages */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; #include #include @@ -73,7 +73,7 @@ struct thread_info { .flags = 0, \ } -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #include @@ -161,7 +161,7 @@ struct thread_info { * * preempt_count needs to be 1 initially, until the scheduler is functional. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Walks up the stack frames to make sure that the specified object is @@ -213,7 +213,7 @@ static inline int arch_within_stack_frames(const void * const stack, #endif } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Thread-synchronous status. @@ -224,7 +224,7 @@ static inline int arch_within_stack_frames(const void * const stack, */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ @@ -242,6 +242,6 @@ static inline int arch_within_stack_frames(const void * const stack, extern void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 85cc57cb6539..8f4579c5a6f8 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -5,7 +5,7 @@ #include "orc_types.h" -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro UNWIND_HINT_END_OF_STACK UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK @@ -88,6 +88,6 @@ #define UNWIND_HINT_RESTORE \ UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h index 2bf9c0e970c3..785f8edcb9c9 100644 --- a/arch/x86/include/asm/vdso/getrandom.h +++ b/arch/x86/include/asm/vdso/getrandom.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_GETRANDOM_H #define __ASM_VDSO_GETRANDOM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -37,6 +37,6 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void return &vdso_rng_data; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 375a34b0f365..428f3f4c2235 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -10,7 +10,7 @@ #ifndef __ASM_VDSO_GETTIMEOFDAY_H #define __ASM_VDSO_GETTIMEOFDAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -350,6 +350,6 @@ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, } #define vdso_calc_ns vdso_calc_ns -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h index 2cbce97d29ea..c9b2ba7a9ec4 100644 --- a/arch/x86/include/asm/vdso/processor.h +++ b/arch/x86/include/asm/vdso/processor.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static __always_inline void rep_nop(void) @@ -22,6 +22,6 @@ struct getcpu_cache; notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h index 37b4a70559a8..72aedebb7648 100644 --- a/arch/x86/include/asm/vdso/vsyscall.h +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -9,7 +9,7 @@ #define VDSO_PAGE_PVCLOCK_OFFSET 0 #define VDSO_PAGE_HVCLOCK_OFFSET 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -36,6 +36,6 @@ struct vdso_rng_data *__x86_get_k_vdso_rng_data(void) /* The asm-generic header needs to be included after the definitions above */ #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index baca0b00ef76..a078a2b0f032 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -72,7 +72,7 @@ #endif #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Explicitly size integers that represent pfns in the public interface * with Xen so that on ARM we can have one ABI that works for 32 and 64 * bit guests. */ @@ -137,7 +137,7 @@ DEFINE_GUEST_HANDLE(xen_ulong_t); #define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl)) #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct trap_info { uint8_t vector; /* exception vector */ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ @@ -186,7 +186,7 @@ struct arch_shared_info { uint32_t wc_sec_hi; #endif }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_X86_32 #include @@ -196,7 +196,7 @@ struct arch_shared_info { #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. @@ -376,7 +376,7 @@ struct xen_pmu_arch { } c; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Prefix forces emulation of some non-trapping instructions. diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index dc40578abded..74d9768a9cf7 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -44,7 +44,7 @@ */ #define __HYPERVISOR_VIRT_START 0xF5800000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct cpu_user_regs { uint32_t ebx; @@ -85,7 +85,7 @@ typedef struct xen_callback xen_callback_t; #define XEN_CALLBACK(__cs, __eip) \ ((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) }) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index c10f279aae93..38a19edb81a3 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -77,7 +77,7 @@ #define VGCF_in_syscall (1<<_VGCF_in_syscall) #define VGCF_IN_SYSCALL VGCF_in_syscall -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct iret_context { /* Top of stack (%rsp at point of hypercall). */ @@ -143,7 +143,7 @@ typedef unsigned long xen_callback_t; #define XEN_CALLBACK(__cs, __rip) \ ((unsigned long)(__rip)) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_XEN_INTERFACE_64_H */ diff --git a/arch/x86/math-emu/control_w.h b/arch/x86/math-emu/control_w.h index 60f4dcc5edc3..93cbc89b34e2 100644 --- a/arch/x86/math-emu/control_w.h +++ b/arch/x86/math-emu/control_w.h @@ -11,7 +11,7 @@ #ifndef _CONTROLW_H_ #define _CONTROLW_H_ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _Const_(x) $##x #else #define _Const_(x) x diff --git a/arch/x86/math-emu/exception.h b/arch/x86/math-emu/exception.h index 75230b977577..59961d350bc4 100644 --- a/arch/x86/math-emu/exception.h +++ b/arch/x86/math-emu/exception.h @@ -10,7 +10,7 @@ #ifndef _EXCEPTION_H_ #define _EXCEPTION_H_ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define Const_(x) $##x #else #define Const_(x) x @@ -37,7 +37,7 @@ #define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) #define PRECISION_LOST_DOWN Const_(EX_Precision) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef DEBUG #define EXCEPTION(x) { printk("exception in %s at line %d\n", \ @@ -46,6 +46,6 @@ #define EXCEPTION(x) FPU_exception(x) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _EXCEPTION_H_ */ diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h index 0c122226ca56..def569c50b76 100644 --- a/arch/x86/math-emu/fpu_emu.h +++ b/arch/x86/math-emu/fpu_emu.h @@ -20,7 +20,7 @@ */ #define PECULIAR_486 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include "fpu_asm.h" #define Const(x) $##x #else @@ -68,7 +68,7 @@ #define FPU_Exception Const(0x80000000) /* Added to tag returns. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include "fpu_system.h" @@ -213,6 +213,6 @@ asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy, #include "fpu_proto.h" #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _FPU_EMU_H_ */ diff --git a/arch/x86/math-emu/status_w.h b/arch/x86/math-emu/status_w.h index b77bafec9526..f642957330ef 100644 --- a/arch/x86/math-emu/status_w.h +++ b/arch/x86/math-emu/status_w.h @@ -13,7 +13,7 @@ #include "fpu_emu.h" /* for definition of PECULIAR_486 */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define Const__(x) $##x #else #define Const__(x) x @@ -37,7 +37,7 @@ #define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define COMP_A_gt_B 1 #define COMP_A_eq_B 2 @@ -63,6 +63,6 @@ static inline void setcc(int cc) # define clear_C1() #endif /* PECULIAR_486 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _STATUS_H_ */ diff --git a/arch/x86/realmode/rm/realmode.h b/arch/x86/realmode/rm/realmode.h index c76041a35397..867e55f1d6af 100644 --- a/arch/x86/realmode/rm/realmode.h +++ b/arch/x86/realmode/rm/realmode.h @@ -2,7 +2,7 @@ #ifndef ARCH_X86_REALMODE_RM_REALMODE_H #define ARCH_X86_REALMODE_RM_REALMODE_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* * 16-bit ljmpw to the real_mode_seg @@ -12,7 +12,7 @@ */ #define LJMPW_RM(to) .byte 0xea ; .word (to), real_mode_seg -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Signature at the end of the realmode region diff --git a/arch/x86/realmode/rm/wakeup.h b/arch/x86/realmode/rm/wakeup.h index 0e4fd08ae447..3b6d8fa82d3e 100644 --- a/arch/x86/realmode/rm/wakeup.h +++ b/arch/x86/realmode/rm/wakeup.h @@ -7,7 +7,7 @@ #ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H #define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include /* This must match data at wakeup.S */ diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h index 3ad3da9a7d97..dbe39b44256b 100644 --- a/tools/arch/x86/include/asm/asm.h +++ b/tools/arch/x86/include/asm/asm.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_ASM_H #define _ASM_X86_ASM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define __ASM_FORM(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, @@ -123,7 +123,7 @@ #ifdef __KERNEL__ /* Exception table entry */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define _ASM_EXTABLE_HANDLE(from, to, handler) \ .pushsection "__ex_table","a" ; \ .balign 4 ; \ @@ -154,7 +154,7 @@ # define _ASM_NOKPROBE(entry) # endif -#else /* ! __ASSEMBLY__ */ +#else /* ! __ASSEMBLER__ */ # define _EXPAND_EXTABLE_HANDLE(x) #x # define _ASM_EXTABLE_HANDLE(from, to, handler) \ " .pushsection \"__ex_table\",\"a\"\n" \ @@ -186,7 +186,7 @@ */ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __KERNEL__ */ diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h index 1c1b7550fa55..cd94221d8335 100644 --- a/tools/arch/x86/include/asm/nops.h +++ b/tools/arch/x86/include/asm/nops.h @@ -82,7 +82,7 @@ #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern const unsigned char * const x86_nops[]; #endif diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index 46d7e06763c9..e0125afa53fb 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -45,7 +45,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include /* @@ -73,6 +73,6 @@ struct orc_entry { #endif } __packed; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/tools/arch/x86/include/asm/pvclock-abi.h b/tools/arch/x86/include/asm/pvclock-abi.h index 1436226efe3e..b9fece5fc96d 100644 --- a/tools/arch/x86/include/asm/pvclock-abi.h +++ b/tools/arch/x86/include/asm/pvclock-abi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PVCLOCK_ABI_H #define _ASM_X86_PVCLOCK_ABI_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * These structs MUST NOT be changed. @@ -44,5 +44,5 @@ struct pvclock_wall_clock { #define PVCLOCK_GUEST_STOPPED (1 << 1) /* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ -- 2.51.0 From 8b70c7436f51ac0f4702b466e1d9db938944e641 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Tue, 18 Mar 2025 22:38:27 +0000 Subject: [PATCH 13/16] perf/x86/intel, x86/cpu: Simplify Intel PMU initialization Architectural Perfmon was introduced on the Family 6 "Core" processors starting with Yonah. Processors before Yonah need their own customized PMU initialization. p6_pmu_init() is expected to provide that initialization for early Family 6 processors. But, currently, it could get called for any Family 6 processor if the architectural perfmon feature is disabled on that processor. To simplify, restrict the P6 PMU initialization to early Family 6 processors that do not have architectural perfmon support and truly need the special handling. As a result, the "unsupported" console print becomes practically unreachable because all the released P6 processors are covered by the switch cases. Move the console print to a common location where it can cover all modern processors (including Family >15) that may not have architectural perfmon support enumerated. Also, use this opportunity to get rid of the unnecessary switch cases in P6 initialization. Only the Pentium Pro processor needs a quirk, and the rest of the processors do not need any special handling. The gaps in the case numbers are only due to no processor with those model numbers being released. Use decimal numbers to represent Intel Family numbers. Also, convert one of the last few Intel x86_model comparisons to a VFM-based one. Signed-off-by: Sohil Mehta Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250318223828.2945651-2-sohil.mehta@intel.com --- arch/x86/events/intel/core.c | 14 ++++++++++---- arch/x86/events/intel/p6.c | 26 +++----------------------- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 40a62bf6ef0a..49a1155e6cb9 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6541,15 +6541,21 @@ __init int intel_pmu_init(void) char *name; struct x86_hybrid_pmu *pmu; + /* Architectural Perfmon was introduced starting with Core "Yonah" */ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { - case 0x6: - return p6_pmu_init(); - case 0xb: + case 6: + if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH) + return p6_pmu_init(); + break; + case 11: return knc_pmu_init(); - case 0xf: + case 15: return p4_pmu_init(); } + + pr_cont("unsupported CPU family %d model %d ", + boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; } diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index a6cffb4f4ef5..65b45e9d7016 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -2,6 +2,8 @@ #include #include +#include + #include "../perf_event.h" /* @@ -248,30 +250,8 @@ __init int p6_pmu_init(void) { x86_pmu = p6_pmu; - switch (boot_cpu_data.x86_model) { - case 1: /* Pentium Pro */ + if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO) x86_add_quirk(p6_pmu_rdpmc_quirk); - break; - - case 3: /* Pentium II - Klamath */ - case 5: /* Pentium II - Deschutes */ - case 6: /* Pentium II - Mendocino */ - break; - - case 7: /* Pentium III - Katmai */ - case 8: /* Pentium III - Coppermine */ - case 10: /* Pentium III Xeon */ - case 11: /* Pentium III - Tualatin */ - break; - - case 9: /* Pentium M - Banias */ - case 13: /* Pentium M - Dothan */ - break; - - default: - pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); - return -ENODEV; - } memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, sizeof(hw_cache_event_ids)); -- 2.51.0 From de844ef582e3a5e0cbd429c68b6079eeb87394e5 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Tue, 18 Mar 2025 22:38:28 +0000 Subject: [PATCH 14/16] perf/x86/intel, x86/cpu: Replace Pentium 4 model checks with VFM ones Introduce a name for an old Pentium 4 model and replace the x86_model checks with VFM ones. This gets rid of one of the last remaining Intel-specific x86_model checks. Signed-off-by: Sohil Mehta Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250318223828.2945651-3-sohil.mehta@intel.com --- arch/x86/events/intel/p4.c | 7 ++++--- arch/x86/include/asm/intel-family.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index 844bc4fc4724..fb726c6fc6e7 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -732,9 +733,9 @@ static bool p4_event_match_cpu_model(unsigned int event_idx) { /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ if (event_idx == P4_EVENT_INSTR_COMPLETED) { - if (boot_cpu_data.x86_model != 3 && - boot_cpu_data.x86_model != 4 && - boot_cpu_data.x86_model != 6) + if (boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT && + boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT_2M && + boot_cpu_data.x86_vfm != INTEL_P4_CEDARMILL) return false; } diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 6cd08da64684..3a97a7eefb51 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -193,6 +193,7 @@ /* Family 15 - NetBurst */ #define INTEL_P4_WILLAMETTE IFM(15, 0x01) /* Also Xeon Foster */ #define INTEL_P4_PRESCOTT IFM(15, 0x03) +#define INTEL_P4_PRESCOTT_2M IFM(15, 0x04) #define INTEL_P4_CEDARMILL IFM(15, 0x06) /* Also Xeon Dempsey */ /* Family 19 */ -- 2.51.0 From 0b7eb55cb706e92d6073e4ab63ccd4d219cf2cda Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 19 Mar 2025 13:25:20 -0400 Subject: [PATCH 15/16] x86/mm: Only do broadcast flush from reclaim if pages were unmapped Track whether pages were unmapped from any MM (even ones with a currently empty mm_cpumask) by the reclaim code, to figure out whether or not broadcast TLB flush should be done when reclaim finishes. The reason any MM must be tracked, and not only ones contributing to the tlbbatch cpumask, is that broadcast ASIDs are expected to be kept up to date even on CPUs where the MM is not currently active. This change allows reclaim to avoid doing TLB flushes when only clean page cache pages and/or slab memory were reclaimed, which is fairly common. ( This is a simpler alternative to the code that was in my INVLPGB series before, and it seems to capture most of the benefit due to how common it is to reclaim only page cache. ) Signed-off-by: Rik van Riel Signed-off-by: Ingo Molnar Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250319132520.6b10ad90@fangorn --- arch/x86/include/asm/tlbbatch.h | 5 +++++ arch/x86/include/asm/tlbflush.h | 1 + arch/x86/mm/tlb.c | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h index 1ad56eb3e8a8..80aaf64ff25f 100644 --- a/arch/x86/include/asm/tlbbatch.h +++ b/arch/x86/include/asm/tlbbatch.h @@ -10,6 +10,11 @@ struct arch_tlbflush_unmap_batch { * the PFNs being flushed.. */ struct cpumask cpumask; + /* + * Set if pages were unmapped from any MM, even one that does not + * have active CPUs in its cpumask. + */ + bool unmapped_pages; }; #endif /* _ARCH_X86_TLBBATCH_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 7cad283d502d..a9af8759de34 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -353,6 +353,7 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b { inc_mm_tlb_gen(mm); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + batch->unmapped_pages = true; mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0efd99053c09..0925768d00cb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -1633,8 +1633,9 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * a local TLB flush is needed. Optimize this use-case by calling * flush_tlb_func_local() directly in this case. */ - if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { + if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->unmapped_pages) { invlpgb_flush_all_nonglobals(); + batch->unmapped_pages = false; } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) { flush_tlb_multi(&batch->cpumask, info); } else if (cpumask_test_cpu(cpu, &batch->cpumask)) { -- 2.51.0 From 3e57612561138d7142721a83743fb8eb2bf09ec5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Mar 2025 22:32:39 +0100 Subject: [PATCH 16/16] x86/asm: Make asm export of __ref_stack_chk_guard unconditional Clang does not tolerate the use of non-TLS symbols for the per-CPU stack protector very well, and to work around this limitation, the symbol passed via the -mstack-protector-guard-symbol= option is never defined in C code, but only in the linker script, and it is exported from an assembly file. This is necessary because Clang will fail to generate the correct %GS based references in a compilation unit that includes a non-TLS definition of the guard symbol being used to store the stack cookie. This problem is only triggered by symbol definitions, not by declarations, but nonetheless, the declaration in is conditional on __GENKSYMS__ being #define'd, so that only genksyms will observe it, but for ordinary compilation, it will be invisible. This is causing problems with the genksyms alternative gendwarfksyms, which does not #define __GENKSYMS__, does not observe the symbol declaration, and therefore lacks the information it needs to version it. Adding the #define creates problems in other places, so that is not a straight-forward solution. So take the easy way out, and drop the conditional on __GENKSYMS__, as this is not really needed to begin with. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/r/20250320213238.4451-2-ardb@kernel.org --- arch/x86/include/asm/asm-prototypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 8d9e62725202..11c6fecc3ad7 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -20,6 +20,6 @@ extern void cmpxchg8b_emu(void); #endif -#if defined(__GENKSYMS__) && defined(CONFIG_STACKPROTECTOR) +#ifdef CONFIG_STACKPROTECTOR extern unsigned long __ref_stack_chk_guard; #endif -- 2.51.0