From 4facb95b7adaf77e2da73aafb9ba60996fe42a12 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Sep 2020 01:50:54 +0200 Subject: [PATCH] x86/entry: Unbreak 32bit fast syscall Andy reported that the syscall treacing for 32bit fast syscall fails: # ./tools/testing/selftests/x86/ptrace_syscall_32 ... [RUN] SYSEMU [FAIL] Initial args are wrong (nr=224, args=10 11 12 13 14 4289172732) ... [RUN] SYSCALL [FAIL] Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732) The eason is that the conversion to generic entry code moved the retrieval of the sixth argument (EBP) after the point where the syscall entry work runs, i.e. ptrace, seccomp, audit... Unbreak it by providing a split up version of syscall_enter_from_user_mode(). - syscall_enter_from_user_mode_prepare() establishes state and enables interrupts - syscall_enter_from_user_mode_work() runs the entry work Replace the call to syscall_enter_from_user_mode() in the 32bit fast syscall C-entry with the split functions and stick the EBP retrieval between them. Fixes: 27d6b4d14f5c ("x86/entry: Use generic syscall entry function") Reported-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87k0xdjbtt.fsf@nanos.tec.linutronix.de --- arch/x86/entry/common.c | 29 +++++++++++++------- include/linux/entry-common.h | 51 +++++++++++++++++++++++++++++------- kernel/entry/common.c | 35 ++++++++++++++++++++----- 3 files changed, 91 insertions(+), 24 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 48512c7944e7e..2f84c7ca74ea9 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs) { - unsigned int nr = (unsigned int)regs->orig_ax; - if (IS_ENABLED(CONFIG_IA32_EMULATION)) current_thread_info()->status |= TS_COMPAT; - /* - * Subtlety here: if ptrace pokes something larger than 2^32-1 into - * orig_ax, the unsigned int return value truncates it. This may - * or may not be necessary, but it matches the old asm behavior. - */ - return (unsigned int)syscall_enter_from_user_mode(regs, nr); + + return (unsigned int)regs->orig_ax; } /* @@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { unsigned int nr = syscall_32_enter(regs); + /* + * Subtlety here: if ptrace pokes something larger than 2^32-1 into + * orig_ax, the unsigned int return value truncates it. This may + * or may not be necessary, but it matches the old asm behavior. + */ + nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); } static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { - unsigned int nr = syscall_32_enter(regs); + unsigned int nr = syscall_32_enter(regs); int res; + /* + * This cannot use syscall_enter_from_user_mode() as it has to + * fetch EBP before invoking any of the syscall entry work + * functions. + */ + syscall_enter_from_user_mode_prepare(regs); + instrumentation_begin(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { @@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) return false; } + /* The case truncates any ptrace induced syscall nr > 2^32 -1 */ + nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr); + /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index efebbffcd5cc9..159c7476b11b4 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs #endif /** - * syscall_enter_from_user_mode - Check and handle work before invoking - * a syscall + * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts * @regs: Pointer to currents pt_regs - * @syscall: The syscall number * * Invoked from architecture specific syscall entry code with interrupts * disabled. The calling code has to be non-instrumentable. When the - * function returns all state is correct and the subsequent functions can be - * instrumented. + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This handles lockdep, RCU (context tracking) and tracing state. + * + * This is invoked when there is extra architecture specific functionality + * to be done between establishing state and handling user mode entry work. + */ +void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); + +/** + * syscall_enter_from_user_mode_work - Check and handle work before invoking + * a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * enabled after invoking syscall_enter_from_user_mode_prepare() and extra + * architecture specific work. * * Returns: The original or a modified syscall number * @@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs * syscall_set_return_value() first. If neither of those are called and -1 * is returned, then the syscall will fail with ENOSYS. * - * The following functionality is handled here: + * It handles the following work items: * - * 1) Establish state (lockdep, RCU (context tracking), tracing) - * 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(), + * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(), * __secure_computing(), trace_sys_enter() - * 3) Invocation of audit_syscall_entry() + * 2) Invocation of audit_syscall_entry() + */ +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); + +/** + * syscall_enter_from_user_mode - Establish state and check and handle work + * before invoking a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This is combination of syscall_enter_from_user_mode_prepare() and + * syscall_enter_from_user_mode_work(). + * + * Returns: The original or a modified syscall number. See + * syscall_enter_from_user_mode_work() for further explanation. */ long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); diff --git a/kernel/entry/common.c b/kernel/entry/common.c index fcae019158cad..18683598edbc6 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, return ret ? : syscall_get_nr(current, regs); } -noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +static __always_inline long +__syscall_enter_from_user_work(struct pt_regs *regs, long syscall) { unsigned long ti_work; - enter_from_user_mode(regs); - instrumentation_begin(); - - local_irq_enable(); ti_work = READ_ONCE(current_thread_info()->flags); if (ti_work & SYSCALL_ENTER_WORK) syscall = syscall_trace_enter(regs, syscall, ti_work); - instrumentation_end(); return syscall; } +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) +{ + return __syscall_enter_from_user_work(regs, syscall); +} + +noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +{ + long ret; + + enter_from_user_mode(regs); + + instrumentation_begin(); + local_irq_enable(); + ret = __syscall_enter_from_user_work(regs, syscall); + instrumentation_end(); + + return ret; +} + +noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) +{ + enter_from_user_mode(regs); + instrumentation_begin(); + local_irq_enable(); + instrumentation_end(); +} + /** * exit_to_user_mode - Fixup state when exiting to user mode * -- 2.51.0