From 43ffeb1886504ee13b2d9b327dc1d228247078b4 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Mon, 31 Mar 2014 15:23:39 +0100 Subject: [PATCH] mm / dtrace: Allow DTrace to entirely disable page faults. In some very limited circumstances (largely restricted to get_user() calls, but not entirely) DTrace needs to be able to probe userspace in irq and trap context, in which page faults are prohibited or otherwise impossible because we cannot guarantee that certain locks (like the mmap_sem) are not already taken by the time DTrace is invoked. In a similar fashion to the existing CPU_DTRACE_NOFAULT machinery, which allows DTrace to prohibit invalid address faults, we introduce a CPU_DTRACE_NOPF value in the per-CPU DTrace flag variable, which, when set by the DTrace module, causes all page faults to set CPU_DTRACE_PF_TRAPPED in the flag variable: the page fault is then ignored (and the current instruction skipped so that it is not immediately retriggered). Clearly, ignoring page faults in random pieces of kernel code would be very risky: this machinery is only used in situations such as get_user() in which the kernel code does nothing with the page other than to extract data from it and pass it back to the DTrace module. The impact on the core x86 fault path when CONFIG_DTRACE is set is one unlikely() conditional and one function call: when CONFIG_DTRACE is not on the impact is zero. (Eliminating the function call and directly testing the DTrace per-CPU flag is possible as a future optimization, but was considered too invasive for now.) Orabug: 18412802 Signed-off-by: Nick Alcock Reviewed-by: Kris Van Hees Reviewed-by: Chuck Anderson Conflicts: arch/x86/mm/fault.c --- arch/x86/mm/fault.c | 11 +++++++++++ include/linux/dtrace_cpu_defines.h | 4 +++- include/linux/dtrace_os.h | 27 ++++++++++++++++++++++++++- kernel/dtrace/dtrace_os.c | 21 ++++++++++++++++++++- 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0c214252685c..6b126ffae31e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -13,6 +13,7 @@ #include /* hstate_index_to_shift */ #include /* prefetchw */ #include /* exception_enter(), ... */ +#include /* dtrace_no_pf */ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ @@ -1117,6 +1118,10 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, return; } + /* + * From here on, we know this must be a fault in userspace. + */ + /* kprobes don't want to hook the spurious faults: */ if (unlikely(kprobes_fault(regs))) return; @@ -1129,6 +1134,12 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, return; } + /* + * DTrace doesn't want to either. + */ + if (unlikely(dtrace_no_pf(regs))) + return; + /* * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: diff --git a/include/linux/dtrace_cpu_defines.h b/include/linux/dtrace_cpu_defines.h index a6709bee8cef..24c425af829e 100644 --- a/include/linux/dtrace_cpu_defines.h +++ b/include/linux/dtrace_cpu_defines.h @@ -31,12 +31,14 @@ #define CPU_DTRACE_TUPOFLOW 0x0200 #define CPU_DTRACE_ENTRY 0x0800 #define CPU_DTRACE_BADSTACK 0x1000 +#define CPU_DTRACE_NOPF 0x2000 +#define CPU_DTRACE_PF_TRAPPED 0x4000 #define CPU_DTRACE_FAULT (CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \ CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \ CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \ CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \ - CPU_DTRACE_BADSTACK) + CPU_DTRACE_BADSTACK | CPU_DTRACE_PF_TRAPPED) #define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP) typedef uint32_t processorid_t; diff --git a/include/linux/dtrace_os.h b/include/linux/dtrace_os.h index b2a92573b2cd..063051766b8c 100644 --- a/include/linux/dtrace_os.h +++ b/include/linux/dtrace_os.h @@ -7,8 +7,11 @@ typedef uint32_t dtrace_id_t; #ifndef HEADERS_CHECK +#ifdef CONFIG_DTRACE + #include #include +#include #define DTRACE_IDNONE 0 @@ -88,6 +91,18 @@ typedef struct stacktrace_state { } stacktrace_state_t; extern void dtrace_stacktrace(stacktrace_state_t *); +extern int dtrace_handle_no_pf(struct pt_regs *); + +/* + * This is only safe to call if we know this is a userspace fault + * or that the call happens after early boot. + */ +static inline int dtrace_no_pf(struct pt_regs *regs) +{ + if (unlikely(DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOPF))) + return dtrace_handle_no_pf(regs); + return 0; +} extern struct task_struct *register_pid_provider(pid_t); extern void unregister_pid_provider(pid_t); @@ -111,6 +126,16 @@ extern int (*dtrace_tracepoint_hit)(fasttrap_machtp_t *, struct pt_regs *); extern int dtrace_tracepoint_enable(pid_t, uintptr_t, fasttrap_machtp_t *); extern int dtrace_tracepoint_disable(pid_t, fasttrap_machtp_t *); -#endif +#else + +/* + * See arch/x86/mm/fault.c. + */ + +#define dtrace_no_pf(ignore) 0 + +#endif /* CONFIG_DTRACE */ + +#endif /* !HEADERS_CHECK */ #endif /* _LINUX_DTRACE_OS_H_ */ diff --git a/kernel/dtrace/dtrace_os.c b/kernel/dtrace/dtrace_os.c index eac5ec0545b5..3404da093642 100644 --- a/kernel/dtrace/dtrace_os.c +++ b/kernel/dtrace/dtrace_os.c @@ -496,7 +496,7 @@ void dtrace_stacktrace(stacktrace_state_t *st) EXPORT_SYMBOL(dtrace_stacktrace); /*---------------------------------------------------------------------------*\ -(* INVALID OPCODE HANDLING *) +(* INVALID OPCODE AND PAGE FAULT HANDLING *) \*---------------------------------------------------------------------------*/ typedef struct dtrace_invop_hdlr { uint8_t (*dtih_func)(struct pt_regs *); @@ -610,6 +610,25 @@ void dtrace_disable(void) } EXPORT_SYMBOL(dtrace_disable); +/* + * The dtrace-is-active body of dtrace_no_pf(), split into a separate function + * to keep icache pressure down while incurring function call overhead only in + * the rare dtrace-active, pf-disabled case. + */ +int dtrace_handle_no_pf(struct pt_regs *regs) +{ + struct insn insn; + + DTRACE_CPUFLAG_SET(CPU_DTRACE_PF_TRAPPED); + + kernel_insn_init(&insn, (void *)regs->ip); + insn_get_length(&insn); + + regs->ip += insn.length; + + return 1; +} + int dtrace_invop_add(uint8_t (*func)(struct pt_regs *)) { dtrace_invop_hdlr_t *hdlr; -- 2.50.1