From 0ad2507d5d93f39619fc42372c347d6006b64319 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Feb 2025 14:02:44 -0800 Subject: [PATCH 01/16] Linux 6.14-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 272db408be5c..96407c1d6be1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From a3e8fe814ad15c16735cdf394454a8bd96eb4d56 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:33 -0500 Subject: [PATCH 02/16] x86/build: Raise the minimum GCC version to 8.1 Stack protector support on 64-bit currently requires that the percpu section is linked at absolute address 0, because older compilers fixed the location of the canary value relative to the GS segment base. GCC 8.1 introduced options to change where the canary value is located, allowing it to be configured as a standard per-CPU variable. This has already been done for 32-bit. Doing the same for 64-bit will enable removing the code needed to support zero-based percpu. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-2-brgerst@gmail.com --- scripts/min-tool-version.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index 91c91201212c..06c4e410ecab 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -19,6 +19,8 @@ binutils) gcc) if [ "$ARCH" = parisc64 ]; then echo 12.0.0 + elif [ "$SRCARCH" = x86 ]; then + echo 8.1.0 else echo 5.1.0 fi -- 2.51.0 From 0ee2689b9374d6fd5f43b703713a532278654749 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:34 -0500 Subject: [PATCH 03/16] x86/stackprotector: Remove stack protector test scripts With GCC 8.1 now the minimum supported compiler for x86, these scripts are no longer needed. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Reviewed-by: Uros Bizjak Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-3-brgerst@gmail.com --- arch/x86/Kconfig | 11 +---------- scripts/gcc-x86_32-has-stack-protector.sh | 8 -------- scripts/gcc-x86_64-has-stack-protector.sh | 4 ---- 3 files changed, 1 insertion(+), 22 deletions(-) delete mode 100755 scripts/gcc-x86_32-has-stack-protector.sh delete mode 100755 scripts/gcc-x86_64-has-stack-protector.sh diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be2c311f5118..6595b35dd52d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -285,7 +285,7 @@ config X86 select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_SETUP_PER_CPU_AREA select HAVE_SOFTIRQ_ON_OWN_STACK - select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR + select HAVE_STACKPROTECTOR select HAVE_STACK_VALIDATION if HAVE_OBJTOOL select HAVE_STATIC_CALL select HAVE_STATIC_CALL_INLINE if HAVE_OBJTOOL @@ -426,15 +426,6 @@ config PGTABLE_LEVELS default 3 if X86_PAE default 2 -config CC_HAS_SANE_STACKPROTECTOR - bool - default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) if 64BIT - default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) - help - We have to make sure stack protector is unconditionally disabled if - the compiler produces broken code or if it does not let us control - the segment on 32-bit kernels. - menu "Processor type and features" config SMP diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh deleted file mode 100755 index 9459ca4f0f11..000000000000 --- a/scripts/gcc-x86_32-has-stack-protector.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -# This requires GCC 8.1 or better. Specifically, we require -# -mstack-protector-guard-reg, added by -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708 - -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m32 -O0 -fstack-protector -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - -o - 2> /dev/null | grep -q "%fs" diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh deleted file mode 100755 index f680bb01aeeb..000000000000 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" -- 2.51.0 From a9a76b38aaf577887103e3ebb41d70e6aa5a4b19 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:35 -0500 Subject: [PATCH 04/16] x86/boot: Disable stack protector for early boot code On 64-bit, this will prevent crashes when the canary access is changed from %gs:40 to %gs:__stack_chk_guard(%rip). RIP-relative addresses from the identity-mapped early boot code will target the wrong address with zero-based percpu. KASLR could then shift that address to an unmapped page causing a crash on boot. This early boot code runs well before user-space is active and does not need stack protector enabled. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-4-brgerst@gmail.com --- arch/x86/kernel/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b43eb7e384eb..84cfa179802c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -44,6 +44,8 @@ KCOV_INSTRUMENT_unwind_orc.o := n KCOV_INSTRUMENT_unwind_frame.o := n KCOV_INSTRUMENT_unwind_guess.o := n +CFLAGS_head32.o := -fno-stack-protector +CFLAGS_head64.o := -fno-stack-protector CFLAGS_irq.o := -I $(src)/../include/asm/trace obj-y += head_$(BITS).o -- 2.51.0 From f58b63857ae38b4484185b799a2759274b930c92 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:36 -0500 Subject: [PATCH 05/16] x86/pvh: Use fixed_percpu_data for early boot GSBASE Instead of having a private area for the stack canary, use fixed_percpu_data for GSBASE like the native kernel. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-5-brgerst@gmail.com --- arch/x86/platform/pvh/head.S | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 4733a5f467b8..723f181b222a 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -173,10 +173,15 @@ SYM_CODE_START(pvh_start_xen) 1: UNWIND_HINT_END_OF_STACK - /* Set base address in stack canary descriptor. */ - mov $MSR_GS_BASE,%ecx - leal canary(%rip), %eax - xor %edx, %edx + /* + * Set up GSBASE. + * Note that on SMP the boot CPU uses the init data section until + * the per-CPU areas are set up. + */ + movl $MSR_GS_BASE,%ecx + leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx + movq %edx, %eax + shrq $32, %rdx wrmsr /* Call xen_prepare_pvh() via the kernel virtual mapping */ @@ -238,8 +243,6 @@ SYM_DATA_START_LOCAL(gdt_start) SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end) .balign 16 -SYM_DATA_LOCAL(canary, .fill 48, 1, 0) - SYM_DATA_START_LOCAL(early_stack) .fill BOOT_STACK_SIZE, 1, 0 SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) -- 2.51.0 From cb7927fda002ca49ae62e2782c1692acc7b80c67 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:37 -0500 Subject: [PATCH 06/16] x86/relocs: Handle R_X86_64_REX_GOTPCRELX relocations Clang may produce R_X86_64_REX_GOTPCRELX relocations when redefining the stack protector location. Treat them as another type of PC-relative relocation. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-6-brgerst@gmail.com --- arch/x86/tools/relocs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index e937be979ec8..92a1e503305e 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -32,6 +32,11 @@ static struct relocs relocs32; static struct relocs relocs32neg; static struct relocs relocs64; # define FMT PRIu64 + +#ifndef R_X86_64_REX_GOTPCRELX +# define R_X86_64_REX_GOTPCRELX 42 +#endif + #else # define FMT PRIu32 #endif @@ -227,6 +232,7 @@ static const char *rel_type(unsigned type) REL_TYPE(R_X86_64_PC16), REL_TYPE(R_X86_64_8), REL_TYPE(R_X86_64_PC8), + REL_TYPE(R_X86_64_REX_GOTPCRELX), #else REL_TYPE(R_386_NONE), REL_TYPE(R_386_32), @@ -861,6 +867,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, case R_X86_64_PC32: case R_X86_64_PLT32: + case R_X86_64_REX_GOTPCRELX: /* * PC relative relocations don't need to be adjusted unless * referencing a percpu symbol. -- 2.51.0 From 78c4374ef8b842c6abf195d6f963853c7ec464d2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 23 Jan 2025 14:07:38 -0500 Subject: [PATCH 07/16] x86/module: Deal with GOT based stack cookie load on Clang < 17 Clang versions before 17 will not honour -fdirect-access-external-data for the load of the stack cookie emitted into each function's prologue and epilogue. This is not an issue for the core kernel, as the linker will relax these loads into LEA instructions that take the address of __stack_chk_guard directly. For modules, however, we need to work around this, by dealing with R_X86_64_REX_GOTPCRELX relocations that refer to __stack_chk_guard. In this case, given that this is a GOT load, the reference should not refer to __stack_chk_guard directly, but to a memory location that holds its address. So take the address of __stack_chk_guard into a static variable, and fix up the relocations to refer to that. [ mingo: Fix broken R_X86_64_GOTPCRELX definition. ] Signed-off-by: Ard Biesheuvel Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-7-brgerst@gmail.com --- arch/x86/include/asm/elf.h | 5 +++-- arch/x86/kernel/module.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1fb83d47711f..128602612eca 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -54,8 +54,9 @@ typedef struct user_i387_struct elf_fpregset_t; #define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ #define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ #define R_X86_64_RELATIVE 8 /* Adjust by program base */ -#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative - offset to GOT */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative offset to GOT */ +#define R_X86_64_GOTPCRELX 41 +#define R_X86_64_REX_GOTPCRELX 42 #define R_X86_64_32 10 /* Direct 32 bit zero extended */ #define R_X86_64_32S 11 /* Direct 32 bit sign extended */ #define R_X86_64_16 12 /* Direct 16 bit zero extended */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 8984abd91c00..a286f32c5503 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,20 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs, goto overflow; size = 4; break; +#if defined(CONFIG_STACKPROTECTOR) && \ + defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000 + case R_X86_64_REX_GOTPCRELX: { + static unsigned long __percpu *const addr = &__stack_chk_guard; + + if (sym->st_value != (u64)addr) { + pr_err("%s: Unsupported GOTPCREL relocation\n", me->name); + return -ENOEXEC; + } + + val = (u64)&addr + rel[i].r_addend; + fallthrough; + } +#endif case R_X86_64_PC32: case R_X86_64_PLT32: val -= (u64)loc; -- 2.51.0 From 80d47defddc000271502057ebd7efa4fd6481542 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:39 -0500 Subject: [PATCH 08/16] x86/stackprotector/64: Convert to normal per-CPU variable Older versions of GCC fixed the location of the stack protector canary at %gs:40. This constraint forced the percpu section to be linked at absolute address 0 so that the canary could be the first data object in the percpu section. Supporting the zero-based percpu section requires additional code to handle relocations for RIP-relative references to percpu data, extra complexity to kallsyms, and workarounds for linker bugs due to the use of absolute symbols. GCC 8.1 supports redefining where the canary is located, allowing it to become a normal percpu variable instead of at a fixed location. This removes the constraint that the percpu section must be zero-based. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Reviewed-by: Uros Bizjak Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-8-brgerst@gmail.com --- arch/x86/Makefile | 20 +++++++++------ arch/x86/entry/entry.S | 2 -- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/processor.h | 16 ++---------- arch/x86/include/asm/stackprotector.h | 36 ++++----------------------- arch/x86/kernel/asm-offsets_64.c | 6 ----- arch/x86/kernel/cpu/common.c | 5 +--- arch/x86/kernel/head_64.S | 3 +-- arch/x86/xen/xen-head.S | 3 +-- 9 files changed, 23 insertions(+), 70 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5b773b34768d..88a1705366f9 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -140,14 +140,7 @@ ifeq ($(CONFIG_X86_32),y) # temporary until string.h is fixed KBUILD_CFLAGS += -ffreestanding - ifeq ($(CONFIG_STACKPROTECTOR),y) - ifeq ($(CONFIG_SMP),y) - KBUILD_CFLAGS += -mstack-protector-guard-reg=fs \ - -mstack-protector-guard-symbol=__ref_stack_chk_guard - else - KBUILD_CFLAGS += -mstack-protector-guard=global - endif - endif + percpu_seg := fs else BITS := 64 UTS_MACHINE := x86_64 @@ -197,6 +190,17 @@ else KBUILD_CFLAGS += -mcmodel=kernel KBUILD_RUSTFLAGS += -Cno-redzone=y KBUILD_RUSTFLAGS += -Ccode-model=kernel + + percpu_seg := gs +endif + +ifeq ($(CONFIG_STACKPROTECTOR),y) + ifeq ($(CONFIG_SMP),y) + KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg) + KBUILD_CFLAGS += -mstack-protector-guard-symbol=__ref_stack_chk_guard + else + KBUILD_CFLAGS += -mstack-protector-guard=global + endif endif # diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index b7ea3e8e9ecc..fe5344a249a1 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -52,7 +52,6 @@ EXPORT_SYMBOL_GPL(mds_verw_sel); THUNK warn_thunk_thunk, __warn_thunk -#ifndef CONFIG_X86_64 /* * Clang's implementation of TLS stack cookies requires the variable in * question to be a TLS variable. If the variable happens to be defined as an @@ -66,4 +65,3 @@ THUNK warn_thunk_thunk, __warn_thunk #ifdef CONFIG_STACKPROTECTOR EXPORT_SYMBOL(__ref_stack_chk_guard); #endif -#endif diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f52dbe0ad93c..33a955aa01d8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -192,7 +192,7 @@ SYM_FUNC_START(__switch_to_asm) #ifdef CONFIG_STACKPROTECTOR movq TASK_stack_canary(%rsi), %rbx - movq %rbx, PER_CPU_VAR(fixed_percpu_data + FIXED_stack_canary) + movq %rbx, PER_CPU_VAR(__stack_chk_guard) #endif /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c0cd10182e90..a4687122951f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -422,16 +422,8 @@ struct irq_stack { #ifdef CONFIG_X86_64 struct fixed_percpu_data { - /* - * GCC hardcodes the stack canary as %gs:40. Since the - * irq_stack is the object at %gs:0, we reserve the bottom - * 48 bytes of the irq stack for the canary. - * - * Once we are willing to require -mstack-protector-guard-symbol= - * support for x86_64 stackprotector, we can get rid of this. - */ char gs_base[40]; - unsigned long stack_canary; + unsigned long reserved; }; DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible; @@ -446,11 +438,7 @@ extern asmlinkage void entry_SYSCALL32_ignore(void); /* Save actual FS/GS selectors and bases to current->thread */ void current_save_fsgs(void); -#else /* X86_64 */ -#ifdef CONFIG_STACKPROTECTOR -DECLARE_PER_CPU(unsigned long, __stack_chk_guard); -#endif -#endif /* !X86_64 */ +#endif /* X86_64 */ struct perf_event; diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 00473a650f51..d43fb589fcf6 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -2,26 +2,10 @@ /* * GCC stack protector support. * - * Stack protector works by putting predefined pattern at the start of + * Stack protector works by putting a predefined pattern at the start of * the stack frame and verifying that it hasn't been overwritten when - * returning from the function. The pattern is called stack canary - * and unfortunately gcc historically required it to be at a fixed offset - * from the percpu segment base. On x86_64, the offset is 40 bytes. - * - * The same segment is shared by percpu area and stack canary. On - * x86_64, percpu symbols are zero based and %gs (64-bit) points to the - * base of percpu area. The first occupant of the percpu area is always - * fixed_percpu_data which contains stack_canary at the appropriate - * offset. On x86_32, the stack canary is just a regular percpu - * variable. - * - * Putting percpu data in %fs on 32-bit is a minor optimization compared to - * using %gs. Since 32-bit userspace normally has %fs == 0, we are likely - * to load 0 into %fs on exit to usermode, whereas with percpu data in - * %gs, we are likely to load a non-null %gs on return to user mode. - * - * Once we are willing to require GCC 8.1 or better for 64-bit stackprotector - * support, we can remove some of this complexity. + * returning from the function. The pattern is called the stack canary + * and is a unique value for each task. */ #ifndef _ASM_STACKPROTECTOR_H @@ -36,6 +20,8 @@ #include +DECLARE_PER_CPU(unsigned long, __stack_chk_guard); + /* * Initialize the stackprotector canary value. * @@ -51,25 +37,13 @@ static __always_inline void boot_init_stack_canary(void) { unsigned long canary = get_random_canary(); -#ifdef CONFIG_X86_64 - BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40); -#endif - current->stack_canary = canary; -#ifdef CONFIG_X86_64 - this_cpu_write(fixed_percpu_data.stack_canary, canary); -#else this_cpu_write(__stack_chk_guard, canary); -#endif } static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle) { -#ifdef CONFIG_X86_64 - per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary; -#else per_cpu(__stack_chk_guard, cpu) = idle->stack_canary; -#endif } #else /* STACKPROTECTOR */ diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index bb65371ea9df..590b6cd0eac0 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -54,11 +54,5 @@ int main(void) BLANK(); #undef ENTRY - BLANK(); - -#ifdef CONFIG_STACKPROTECTOR - OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary); - BLANK(); -#endif return 0; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7cce91b19fb2..b71178f0ed6c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2089,8 +2089,7 @@ void syscall_init(void) if (!cpu_feature_enabled(X86_FEATURE_FRED)) idt_syscall_init(); } - -#else /* CONFIG_X86_64 */ +#endif /* CONFIG_X86_64 */ #ifdef CONFIG_STACKPROTECTOR DEFINE_PER_CPU(unsigned long, __stack_chk_guard); @@ -2099,8 +2098,6 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); #endif #endif -#endif /* CONFIG_X86_64 */ - /* * Clear all 6 debug registers: */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 31345e0ba006..c3d73c04603f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -361,8 +361,7 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) /* Set up %gs. * - * The base of %gs always points to fixed_percpu_data. If the - * stack protector canary is enabled, it is located at %gs:40. + * The base of %gs always points to fixed_percpu_data. * Note that, on SMP, the boot cpu uses init data section until * the per cpu areas are set up. */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 894edf8d6d62..a31b057e641e 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -33,8 +33,7 @@ SYM_CODE_START(startup_xen) /* Set up %gs. * - * The base of %gs always points to fixed_percpu_data. If the - * stack protector canary is enabled, it is located at %gs:40. + * The base of %gs always points to fixed_percpu_data. * Note that, on SMP, the boot cpu uses init data section until * the per cpu areas are set up. */ -- 2.51.0 From 9d7de2aa8b41407bc96d89a80dc1fd637d389d42 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:40 -0500 Subject: [PATCH 09/16] x86/percpu/64: Use relative percpu offsets The percpu section is currently linked at absolute address 0, because older compilers hard-coded the stack protector canary value at a fixed offset from the start of the GS segment. Now that the canary is a normal percpu variable, the percpu section does not need to be linked at a specific address. x86-64 will now calculate the percpu offsets as the delta between the initial percpu address and the dynamically allocated memory, like other architectures. Note that GSBASE is limited to the canonical address width (48 or 57 bits, sign-extended). As long as the kernel text, modules, and the dynamically allocated percpu memory are all in the negative address space, the delta will not overflow this limit. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Reviewed-by: Uros Bizjak Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-9-brgerst@gmail.com --- arch/x86/include/asm/processor.h | 6 +++++- arch/x86/kernel/head_64.S | 19 +++++++++---------- arch/x86/kernel/setup_percpu.c | 12 ++---------- arch/x86/kernel/vmlinux.lds.S | 29 +---------------------------- arch/x86/platform/pvh/head.S | 5 ++--- arch/x86/tools/relocs.c | 10 +++------- arch/x86/xen/xen-head.S | 9 ++++----- init/Kconfig | 2 +- 8 files changed, 27 insertions(+), 65 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a4687122951f..b8fee88dac3d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -431,7 +431,11 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data); static inline unsigned long cpu_kernelmode_gs_base(int cpu) { - return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); +#ifdef CONFIG_SMP + return per_cpu_offset(cpu); +#else + return 0; +#endif } extern asmlinkage void entry_SYSCALL32_ignore(void); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c3d73c04603f..2843b0a56198 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64) /* Set up the stack for verify_cpu() */ leaq __top_init_kernel_stack(%rip), %rsp - /* Setup GSBASE to allow stack canary access for C code */ + /* + * Set up GSBASE. + * Note that on SMP the boot CPU uses the init data section until + * the per-CPU areas are set up. + */ movl $MSR_GS_BASE, %ecx - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx - movl %edx, %eax - shrq $32, %rdx + xorl %eax, %eax + xorl %edx, %edx wrmsr call startup_64_setup_gdt_idt @@ -359,16 +362,12 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) movl %eax,%fs movl %eax,%gs - /* Set up %gs. - * - * The base of %gs always points to fixed_percpu_data. + /* + * Set up GSBASE. * Note that, on SMP, the boot cpu uses init data section until * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx -#ifndef CONFIG_SMP - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx -#endif movl %edx, %eax shrq $32, %rdx wrmsr diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b30d6e180df7..1e7be9409aa2 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -23,18 +23,10 @@ #include #include -#ifdef CONFIG_X86_64 -#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) -#else -#define BOOT_PERCPU_OFFSET 0 -#endif - -DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); -unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = { - [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, -}; +unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init; EXPORT_SYMBOL(__per_cpu_offset); /* diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0deb4887d6e9..8a598515239a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -112,12 +112,6 @@ ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(6); /* RW_ */ -#ifdef CONFIG_X86_64 -#ifdef CONFIG_SMP - percpu PT_LOAD FLAGS(6); /* RW_ */ -#endif - init PT_LOAD FLAGS(7); /* RWE */ -#endif note PT_NOTE FLAGS(0); /* ___ */ } @@ -216,21 +210,7 @@ SECTIONS __init_begin = .; /* paired with __init_end */ } -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) - /* - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the - * output PHDR, so the next output section - .init.text - should - * start another segment - init. - */ - PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) - ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START, - "per-CPU data too large - increase CONFIG_PHYSICAL_START") -#endif - INIT_TEXT_SECTION(PAGE_SIZE) -#ifdef CONFIG_X86_64 - :init -#endif /* * Section for code used exclusively before alternatives are run. All @@ -347,9 +327,7 @@ SECTIONS EXIT_DATA } -#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) PERCPU_SECTION(INTERNODE_CACHE_BYTES) -#endif RUNTIME_CONST_VARIABLES RUNTIME_CONST(ptr, USER_PTR_MAX) @@ -497,16 +475,11 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); * Per-cpu symbols which need to be offset from __per_cpu_load * for the boot processor. */ -#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load +#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) INIT_PER_CPU(gdt_page); INIT_PER_CPU(fixed_percpu_data); INIT_PER_CPU(irq_stack_backing_store); -#ifdef CONFIG_SMP -. = ASSERT((fixed_percpu_data == 0), - "fixed_percpu_data is not at start of per-cpu area"); -#endif - #ifdef CONFIG_MITIGATION_UNRET_ENTRY . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); #endif diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 723f181b222a..cfa18ec7d55f 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -179,9 +179,8 @@ SYM_CODE_START(pvh_start_xen) * the per-CPU areas are set up. */ movl $MSR_GS_BASE,%ecx - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx - movq %edx, %eax - shrq $32, %rdx + xorl %eax, %eax + xorl %edx, %edx wrmsr /* Call xen_prepare_pvh() via the kernel virtual mapping */ diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 92a1e503305e..3cb3b30b6706 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -835,12 +835,7 @@ static void percpu_init(void) */ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) { - int shndx = sym_index(sym); - - return (shndx == per_cpu_shndx) && - strcmp(symname, "__init_begin") && - strcmp(symname, "__per_cpu_load") && - strncmp(symname, "init_per_cpu_", 13); + return 0; } @@ -1062,7 +1057,8 @@ static int cmp_relocs(const void *va, const void *vb) static void sort_relocs(struct relocs *r) { - qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); + if (r->count) + qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); } static int write32(uint32_t v, FILE *f) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index a31b057e641e..5ccb4c54e241 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -31,15 +31,14 @@ SYM_CODE_START(startup_xen) leaq __top_init_kernel_stack(%rip), %rsp - /* Set up %gs. - * - * The base of %gs always points to fixed_percpu_data. + /* + * Set up GSBASE. * Note that, on SMP, the boot cpu uses init data section until * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx - movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax - cdq + xorl %eax, %eax + xorl %edx, %edx wrmsr mov %rsi, %rdi diff --git a/init/Kconfig b/init/Kconfig index d0d021b3fa3b..b5d9c0fa69f6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1872,7 +1872,7 @@ config KALLSYMS_ALL config KALLSYMS_ABSOLUTE_PERCPU bool depends on KALLSYMS - default X86_64 && SMP + default n # end of the "standard kernel features (expert users)" menu -- 2.51.0 From b5c4f95351a097a635c1a7fc8d9efa18308491b5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:41 -0500 Subject: [PATCH 10/16] x86/percpu/64: Remove fixed_percpu_data Now that the stack protector canary value is a normal percpu variable, fixed_percpu_data is unused and can be removed. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Reviewed-by: Uros Bizjak Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-10-brgerst@gmail.com --- arch/x86/include/asm/processor.h | 8 -------- arch/x86/kernel/cpu/common.c | 4 ---- arch/x86/kernel/vmlinux.lds.S | 1 - arch/x86/tools/relocs.c | 1 - 4 files changed, 14 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b8fee88dac3d..b3d153730f63 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -421,14 +421,6 @@ struct irq_stack { } __aligned(IRQ_STACK_SIZE); #ifdef CONFIG_X86_64 -struct fixed_percpu_data { - char gs_base[40]; - unsigned long reserved; -}; - -DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible; -DECLARE_INIT_PER_CPU(fixed_percpu_data); - static inline unsigned long cpu_kernelmode_gs_base(int cpu) { #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b71178f0ed6c..8b49b1338f76 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2023,10 +2023,6 @@ EXPORT_PER_CPU_SYMBOL(pcpu_hot); EXPORT_PER_CPU_SYMBOL(const_pcpu_hot); #ifdef CONFIG_X86_64 -DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, - fixed_percpu_data) __aligned(PAGE_SIZE) __visible; -EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); - static void wrmsrl_cstar(unsigned long val) { /* diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8a598515239a..93c2fa8a7522 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -477,7 +477,6 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); */ #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) INIT_PER_CPU(gdt_page); -INIT_PER_CPU(fixed_percpu_data); INIT_PER_CPU(irq_stack_backing_store); #ifdef CONFIG_MITIGATION_UNRET_ENTRY diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 3cb3b30b6706..b5e3695a0615 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -830,7 +830,6 @@ static void percpu_init(void) * __per_cpu_load * * The "gold" linker incorrectly associates: - * init_per_cpu__fixed_percpu_data * init_per_cpu__gdt_page */ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) -- 2.51.0 From a8327be7b2aa067ff2b11551732d5bd8b49ef7d1 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:42 -0500 Subject: [PATCH 11/16] x86/boot/64: Remove inverse relocations Inverse relocations were needed to offset the effects of relocation for RIP-relative accesses to zero-based percpu data. Now that the percpu section is linked normally as part of the kernel image, they are no longer needed. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-11-brgerst@gmail.com --- arch/x86/boot/compressed/misc.c | 14 +--- arch/x86/tools/relocs.c | 130 +------------------------------- 2 files changed, 2 insertions(+), 142 deletions(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 0d37420cad02..1cdcd4aaf395 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -235,7 +235,7 @@ static void handle_relocations(void *output, unsigned long output_len, /* * Process relocations: 32 bit relocations first then 64 bit after. - * Three sets of binary relocations are added to the end of the kernel + * Two sets of binary relocations are added to the end of the kernel * before compression. Each relocation table entry is the kernel * address of the location which needs to be updated stored as a * 32-bit value which is sign extended to 64 bits. @@ -245,8 +245,6 @@ static void handle_relocations(void *output, unsigned long output_len, * kernel bits... * 0 - zero terminator for 64 bit relocations * 64 bit relocation repeated - * 0 - zero terminator for inverse 32 bit relocations - * 32 bit inverse relocation repeated * 0 - zero terminator for 32 bit relocations * 32 bit relocation repeated * @@ -263,16 +261,6 @@ static void handle_relocations(void *output, unsigned long output_len, *(uint32_t *)ptr += delta; } #ifdef CONFIG_X86_64 - while (*--reloc) { - long extended = *reloc; - extended += map; - - ptr = (unsigned long)extended; - if (ptr < min_addr || ptr > max_addr) - error("inverse 32-bit relocation outside of kernel!\n"); - - *(int32_t *)ptr -= delta; - } for (reloc--; *reloc; reloc--) { long extended = *reloc; extended += map; diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index b5e3695a0615..ae6962665b35 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -29,7 +29,6 @@ static struct relocs relocs16; static struct relocs relocs32; #if ELF_BITS == 64 -static struct relocs relocs32neg; static struct relocs relocs64; # define FMT PRIu64 @@ -91,7 +90,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "__initramfs_start|" "(jiffies|jiffies_64)|" #if ELF_BITS == 64 - "__per_cpu_load|" "init_per_cpu__.*|" "__end_rodata_hpage_align|" #endif @@ -290,34 +288,6 @@ static const char *sym_name(const char *sym_strtab, Elf_Sym *sym) return name; } -static Elf_Sym *sym_lookup(const char *symname) -{ - int i; - - for (i = 0; i < shnum; i++) { - struct section *sec = &secs[i]; - long nsyms; - char *strtab; - Elf_Sym *symtab; - Elf_Sym *sym; - - if (sec->shdr.sh_type != SHT_SYMTAB) - continue; - - nsyms = sec->shdr.sh_size/sizeof(Elf_Sym); - symtab = sec->symtab; - strtab = sec->link->strtab; - - for (sym = symtab; --nsyms >= 0; sym++) { - if (!sym->st_name) - continue; - if (strcmp(symname, strtab + sym->st_name) == 0) - return sym; - } - } - return 0; -} - #if BYTE_ORDER == LITTLE_ENDIAN # define le16_to_cpu(val) (val) # define le32_to_cpu(val) (val) @@ -766,78 +736,8 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, } } -/* - * The .data..percpu section is a special case for x86_64 SMP kernels. - * It is used to initialize the actual per_cpu areas and to provide - * definitions for the per_cpu variables that correspond to their offsets - * within the percpu area. Since the values of all of the symbols need - * to be offsets from the start of the per_cpu area the virtual address - * (sh_addr) of .data..percpu is 0 in SMP kernels. - * - * This means that: - * - * Relocations that reference symbols in the per_cpu area do not - * need further relocation (since the value is an offset relative - * to the start of the per_cpu area that does not change). - * - * Relocations that apply to the per_cpu area need to have their - * offset adjusted by by the value of __per_cpu_load to make them - * point to the correct place in the loaded image (because the - * virtual address of .data..percpu is 0). - * - * For non SMP kernels .data..percpu is linked as part of the normal - * kernel data and does not require special treatment. - * - */ -static int per_cpu_shndx = -1; -static Elf_Addr per_cpu_load_addr; - -static void percpu_init(void) -{ - int i; - - for (i = 0; i < shnum; i++) { - ElfW(Sym) *sym; - - if (strcmp(sec_name(i), ".data..percpu")) - continue; - - if (secs[i].shdr.sh_addr != 0) /* non SMP kernel */ - return; - - sym = sym_lookup("__per_cpu_load"); - if (!sym) - die("can't find __per_cpu_load\n"); - - per_cpu_shndx = i; - per_cpu_load_addr = sym->st_value; - - return; - } -} - #if ELF_BITS == 64 -/* - * Check to see if a symbol lies in the .data..percpu section. - * - * The linker incorrectly associates some symbols with the - * .data..percpu section so we also need to check the symbol - * name to make sure that we classify the symbol correctly. - * - * The GNU linker incorrectly associates: - * __init_begin - * __per_cpu_load - * - * The "gold" linker incorrectly associates: - * init_per_cpu__gdt_page - */ -static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) -{ - return 0; -} - - static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, const char *symname) { @@ -848,12 +748,6 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, if (sym->st_shndx == SHN_UNDEF) return 0; - /* - * Adjust the offset if this reloc applies to the percpu section. - */ - if (sec->shdr.sh_info == per_cpu_shndx) - offset += per_cpu_load_addr; - switch (r_type) { case R_X86_64_NONE: /* NONE can be ignored. */ @@ -863,32 +757,21 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, case R_X86_64_PLT32: case R_X86_64_REX_GOTPCRELX: /* - * PC relative relocations don't need to be adjusted unless - * referencing a percpu symbol. + * PC relative relocations don't need to be adjusted. * * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32. */ - if (is_percpu_sym(sym, symname)) - add_reloc(&relocs32neg, offset); break; case R_X86_64_PC64: /* * Only used by jump labels */ - if (is_percpu_sym(sym, symname)) - die("Invalid R_X86_64_PC64 relocation against per-CPU symbol %s\n", symname); break; case R_X86_64_32: case R_X86_64_32S: case R_X86_64_64: - /* - * References to the percpu area don't need to be adjusted. - */ - if (is_percpu_sym(sym, symname)) - break; - if (shn_abs) { /* * Whitelisted absolute symbols do not require @@ -1101,7 +984,6 @@ static void emit_relocs(int as_text, int use_real_mode) /* Order the relocations for more efficient processing */ sort_relocs(&relocs32); #if ELF_BITS == 64 - sort_relocs(&relocs32neg); sort_relocs(&relocs64); #else sort_relocs(&relocs16); @@ -1133,13 +1015,6 @@ static void emit_relocs(int as_text, int use_real_mode) /* Now print each relocation */ for (i = 0; i < relocs64.count; i++) write_reloc(relocs64.offset[i], stdout); - - /* Print a stop */ - write_reloc(0, stdout); - - /* Now print each inverse 32-bit relocation */ - for (i = 0; i < relocs32neg.count; i++) - write_reloc(relocs32neg.offset[i], stdout); #endif /* Print a stop */ @@ -1192,9 +1067,6 @@ void process(FILE *fp, int use_real_mode, int as_text, read_symtabs(fp); read_relocs(fp); - if (ELF_BITS == 64) - percpu_init(); - if (show_absolute_syms) { print_absolute_symbols(); return; -- 2.51.0 From 38a4968b3190f873a8a60e953287278eddf037f1 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:43 -0500 Subject: [PATCH 12/16] x86/percpu/64: Remove INIT_PER_CPU macros Now that the load and link addresses of percpu variables are the same, these macros are no longer necessary. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Reviewed-by: Uros Bizjak Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-12-brgerst@gmail.com --- arch/x86/include/asm/desc.h | 1 - arch/x86/include/asm/percpu.h | 22 ---------------------- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/irq_64.c | 1 - arch/x86/kernel/vmlinux.lds.S | 7 ------- arch/x86/tools/relocs.c | 1 - 6 files changed, 1 insertion(+), 33 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 62dc9f59ea76..ec95fe44fa3a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -46,7 +46,6 @@ struct gdt_page { } __attribute__((aligned(PAGE_SIZE))); DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); -DECLARE_INIT_PER_CPU(gdt_page); /* Provide the original GDT */ static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e525cd85f999..1a76eb87c5d8 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -20,12 +20,6 @@ #define PER_CPU_VAR(var) __percpu(var)__percpu_rel -#ifdef CONFIG_X86_64_SMP -# define INIT_PER_CPU_VAR(var) init_per_cpu__##var -#else -# define INIT_PER_CPU_VAR(var) var -#endif - #else /* !__ASSEMBLY__: */ #include @@ -97,22 +91,6 @@ #define __percpu_arg(x) __percpu_prefix "%" #x #define __force_percpu_arg(x) __force_percpu_prefix "%" #x -/* - * Initialized pointers to per-CPU variables needed for the boot - * processor need to use these macros to get the proper address - * offset from __per_cpu_load on SMP. - * - * There also must be an entry in vmlinux_64.lds.S - */ -#define DECLARE_INIT_PER_CPU(var) \ - extern typeof(var) init_per_cpu_var(var) - -#ifdef CONFIG_X86_64_SMP -# define init_per_cpu_var(var) init_per_cpu__##var -#else -# define init_per_cpu_var(var) var -#endif - /* * For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 22c9ba305ac1..05f8b8acf784 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -567,7 +567,7 @@ void early_setup_idt(void) */ void __head startup_64_setup_gdt_idt(void) { - struct desc_struct *gdt = (void *)(__force unsigned long)init_per_cpu_var(gdt_page.gdt); + struct desc_struct *gdt = (void *)(__force unsigned long)gdt_page.gdt; void *handler = NULL; struct desc_ptr startup_gdt_descr = { diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index ade0043ce56e..56bdeecd8ee0 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -27,7 +27,6 @@ #include DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible; -DECLARE_INIT_PER_CPU(irq_stack_backing_store); #ifdef CONFIG_VMAP_STACK /* diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 93c2fa8a7522..1769a7126224 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -471,13 +471,6 @@ SECTIONS PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); #ifdef CONFIG_X86_64 -/* - * Per-cpu symbols which need to be offset from __per_cpu_load - * for the boot processor. - */ -#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) -INIT_PER_CPU(gdt_page); -INIT_PER_CPU(irq_stack_backing_store); #ifdef CONFIG_MITIGATION_UNRET_ENTRY . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index ae6962665b35..5778bc498415 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -90,7 +90,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "__initramfs_start|" "(jiffies|jiffies_64)|" #if ELF_BITS == 64 - "init_per_cpu__.*|" "__end_rodata_hpage_align|" #endif "_end)$" -- 2.51.0 From 95b0916118106054e1f3d5d7f8628ef3dc0b3c02 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:44 -0500 Subject: [PATCH 13/16] percpu: Remove PER_CPU_FIRST_SECTION x86-64 was the last user. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-13-brgerst@gmail.com --- include/asm-generic/vmlinux.lds.h | 1 - include/linux/percpu-defs.h | 12 ------------ 2 files changed, 13 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 02a4adb4a999..a3c77a106565 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1062,7 +1062,6 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) */ #define PERCPU_INPUT(cacheline) \ __per_cpu_start = .; \ - *(.data..percpu..first) \ . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ . = ALIGN(cacheline); \ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 5b520fe86b60..40d34e032d5b 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -26,13 +26,11 @@ #define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned" #define PER_CPU_ALIGNED_SECTION "..shared_aligned" #endif -#define PER_CPU_FIRST_SECTION "..first" #else #define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_ALIGNED_SECTION "..shared_aligned" -#define PER_CPU_FIRST_SECTION "" #endif @@ -114,16 +112,6 @@ #define DEFINE_PER_CPU(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "") -/* - * Declaration/definition used for per-CPU variables that must come first in - * the set of variables. - */ -#define DECLARE_PER_CPU_FIRST(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) - -#define DEFINE_PER_CPU_FIRST(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) - /* * Declaration/definition used for per-CPU variables that must be cacheline * aligned under SMP conditions so that, whilst a particular instance of the -- 2.51.0 From e23cff6861781ac4e15de6c7bf2d2a0b79cb52ef Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:45 -0500 Subject: [PATCH 14/16] percpu: Remove PERCPU_VADDR() x86-64 was the last user. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-14-brgerst@gmail.com --- include/asm-generic/vmlinux.lds.h | 36 +------------------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a3c77a106565..e25a8aeee29c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1073,47 +1073,13 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) __per_cpu_end = .; /** - * PERCPU_VADDR - define output section for percpu area + * PERCPU_SECTION - define output section for percpu area * @cacheline: cacheline size - * @vaddr: explicit base address (optional) - * @phdr: destination PHDR (optional) * * Macro which expands to output section for percpu area. * * @cacheline is used to align subsections to avoid false cacheline * sharing between subsections for different purposes. - * - * If @vaddr is not blank, it specifies explicit base address and all - * percpu symbols will be offset from the given address. If blank, - * @vaddr always equals @laddr + LOAD_OFFSET. - * - * @phdr defines the output PHDR to use if not blank. Be warned that - * output PHDR is sticky. If @phdr is specified, the next output - * section in the linker script will go there too. @phdr should have - * a leading colon. - * - * Note that this macros defines __per_cpu_load as an absolute symbol. - * If there is no need to put the percpu section at a predetermined - * address, use PERCPU_SECTION. - */ -#define PERCPU_VADDR(cacheline, vaddr, phdr) \ - __per_cpu_load = .; \ - .data..percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ - PERCPU_INPUT(cacheline) \ - } phdr \ - . = __per_cpu_load + SIZEOF(.data..percpu); - -/** - * PERCPU_SECTION - define output section for percpu area, simple version - * @cacheline: cacheline size - * - * Align to PAGE_SIZE and outputs output section for percpu area. This - * macro doesn't manipulate @vaddr or @phdr and __per_cpu_load and - * __per_cpu_start will be identical. - * - * This macro is equivalent to ALIGN(PAGE_SIZE); PERCPU_VADDR(@cacheline,,) - * except that __per_cpu_load is defined as a relative symbol against - * .data..percpu which is required for relocatable x86_32 configuration. */ #define PERCPU_SECTION(cacheline) \ . = ALIGN(PAGE_SIZE); \ -- 2.51.0 From 4b00c1160a13d8bf7297ebf49ec07a84e1f41132 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:46 -0500 Subject: [PATCH 15/16] percpu: Remove __per_cpu_load __per_cpu_load is now always equal to __per_cpu_start. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-15-brgerst@gmail.com --- include/asm-generic/sections.h | 2 +- include/asm-generic/vmlinux.lds.h | 1 - mm/percpu.c | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index c768de6f19a9..0755bc39b0d8 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -39,7 +39,7 @@ extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; extern char __start_ro_after_init[], __end_ro_after_init[]; extern char _end[]; -extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; +extern char __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __entry_text_start[], __entry_text_end[]; extern char __start_rodata[], __end_rodata[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e25a8aeee29c..92fc06f7da74 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1084,7 +1084,6 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) #define PERCPU_SECTION(cacheline) \ . = ALIGN(PAGE_SIZE); \ .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ - __per_cpu_load = .; \ PERCPU_INPUT(cacheline) \ } diff --git a/mm/percpu.c b/mm/percpu.c index ac61e3fc5f15..7b5835356d1e 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -3071,7 +3071,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, continue; } /* copy and return the unused part */ - memcpy(ptr, __per_cpu_load, ai->static_size); + memcpy(ptr, __per_cpu_start, ai->static_size); pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum); } } @@ -3240,7 +3240,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t flush_cache_vmap_early(unit_addr, unit_addr + ai->unit_size); /* copy static data */ - memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); + memcpy((void *)unit_addr, __per_cpu_start, ai->static_size); } /* we're ready, commit */ -- 2.51.0 From 01157ddc58dc2fe428ec17dd5a18cc13f134639f Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 23 Jan 2025 14:07:47 -0500 Subject: [PATCH 16/16] kallsyms: Remove KALLSYMS_ABSOLUTE_PERCPU x86-64 was the only user. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Reviewed-by: Ard Biesheuvel Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250123190747.745588-16-brgerst@gmail.com --- init/Kconfig | 5 --- kernel/kallsyms.c | 12 ++----- scripts/kallsyms.c | 72 +++++++---------------------------------- scripts/link-vmlinux.sh | 4 --- 4 files changed, 14 insertions(+), 79 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index b5d9c0fa69f6..a0ea04c17784 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1869,11 +1869,6 @@ config KALLSYMS_ALL Say N unless you really need all symbols, or kernel live patching. -config KALLSYMS_ABSOLUTE_PERCPU - bool - depends on KALLSYMS - default n - # end of the "standard kernel features (expert users)" menu config ARCH_HAS_MEMBARRIER_CALLBACKS diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index a9a0ca605d4a..4198f30aac3c 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -148,16 +148,8 @@ static unsigned int get_symbol_offset(unsigned long pos) unsigned long kallsyms_sym_address(int idx) { - /* values are unsigned offsets if --absolute-percpu is not in effect */ - if (!IS_ENABLED(CONFIG_KALLSYMS_ABSOLUTE_PERCPU)) - return kallsyms_relative_base + (u32)kallsyms_offsets[idx]; - - /* ...otherwise, positive offsets are absolute values */ - if (kallsyms_offsets[idx] >= 0) - return kallsyms_offsets[idx]; - - /* ...and negative offsets are relative to kallsyms_relative_base - 1 */ - return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; + /* values are unsigned offsets */ + return kallsyms_relative_base + (u32)kallsyms_offsets[idx]; } static unsigned int get_symbol_seq(int index) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 03852da3d249..4b0234e4b12f 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -5,7 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S + * Usage: kallsyms [--all-symbols] in.map > out.S * * Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might @@ -37,7 +37,6 @@ struct sym_entry { unsigned long long addr; unsigned int len; unsigned int seq; - bool percpu_absolute; unsigned char sym[]; }; @@ -55,14 +54,9 @@ static struct addr_range text_ranges[] = { #define text_range_text (&text_ranges[0]) #define text_range_inittext (&text_ranges[1]) -static struct addr_range percpu_range = { - "__per_cpu_start", "__per_cpu_end", -1ULL, 0 -}; - static struct sym_entry **table; static unsigned int table_size, table_cnt; static int all_symbols; -static int absolute_percpu; static int token_profit[0x10000]; @@ -73,7 +67,7 @@ static unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] in.map > out.S\n"); exit(1); } @@ -164,7 +158,6 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len) return NULL; check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges)); - check_symbol_range(name, addr, &percpu_range, 1); /* include the type field in the symbol name, so that it gets * compressed together */ @@ -175,7 +168,6 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len) sym->len = len; sym->sym[0] = type; strcpy(sym_name(sym), name); - sym->percpu_absolute = false; return sym; } @@ -319,11 +311,6 @@ static int expand_symbol(const unsigned char *data, int len, char *result) return total; } -static bool symbol_absolute(const struct sym_entry *s) -{ - return s->percpu_absolute; -} - static int compare_names(const void *a, const void *b) { int ret; @@ -455,22 +442,11 @@ static void write_src(void) */ long long offset; - bool overflow; - - if (!absolute_percpu) { - offset = table[i]->addr - relative_base; - overflow = offset < 0 || offset > UINT_MAX; - } else if (symbol_absolute(table[i])) { - offset = table[i]->addr; - overflow = offset < 0 || offset > INT_MAX; - } else { - offset = relative_base - table[i]->addr - 1; - overflow = offset < INT_MIN || offset >= 0; - } - if (overflow) { + + offset = table[i]->addr - relative_base; + if (offset < 0 || offset > UINT_MAX) { fprintf(stderr, "kallsyms failure: " - "%s symbol value %#llx out of range in relative mode\n", - symbol_absolute(table[i]) ? "absolute" : "relative", + "relative symbol value %#llx out of range\n", table[i]->addr); exit(EXIT_FAILURE); } @@ -725,36 +701,15 @@ static void sort_symbols(void) qsort(table, table_cnt, sizeof(table[0]), compare_symbols); } -static void make_percpus_absolute(void) -{ - unsigned int i; - - for (i = 0; i < table_cnt; i++) - if (symbol_in_range(table[i], &percpu_range, 1)) { - /* - * Keep the 'A' override for percpu symbols to - * ensure consistent behavior compared to older - * versions of this tool. - */ - table[i]->sym[0] = 'A'; - table[i]->percpu_absolute = true; - } -} - /* find the minimum non-absolute symbol address */ static void record_relative_base(void) { - unsigned int i; - - for (i = 0; i < table_cnt; i++) - if (!symbol_absolute(table[i])) { - /* - * The table is sorted by address. - * Take the first non-absolute symbol value. - */ - relative_base = table[i]->addr; - return; - } + /* + * The table is sorted by address. + * Take the first symbol value. + */ + if (table_cnt) + relative_base = table[0]->addr; } int main(int argc, char **argv) @@ -762,7 +717,6 @@ int main(int argc, char **argv) while (1) { static const struct option long_options[] = { {"all-symbols", no_argument, &all_symbols, 1}, - {"absolute-percpu", no_argument, &absolute_percpu, 1}, {}, }; @@ -779,8 +733,6 @@ int main(int argc, char **argv) read_map(argv[optind]); shrink_table(); - if (absolute_percpu) - make_percpus_absolute(); sort_symbols(); record_relative_base(); optimize_token_table(); diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 56a077d204cf..67e66333bd2a 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -144,10 +144,6 @@ kallsyms() kallsymopt="${kallsymopt} --all-symbols" fi - if is_enabled CONFIG_KALLSYMS_ABSOLUTE_PERCPU; then - kallsymopt="${kallsymopt} --absolute-percpu" - fi - info KSYMS "${2}.S" scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S" -- 2.51.0