In some very limited circumstances (largely restricted to get_user() calls, but
not entirely) DTrace needs to be able to probe userspace in irq and trap
context, in which page faults are prohibited or otherwise impossible because we
cannot guarantee that certain locks (like the mmap_sem) are not already taken by
the time DTrace is invoked.
In a similar fashion to the existing CPU_DTRACE_NOFAULT machinery, which allows
DTrace to prohibit invalid address faults, we introduce a CPU_DTRACE_NOPF value
in the per-CPU DTrace flag variable, which, when set by the DTrace module,
causes all page faults to set CPU_DTRACE_PF_TRAPPED in the flag variable: the
page fault is then ignored (and the current instruction skipped so that it is
not immediately retriggered).
Clearly, ignoring page faults in random pieces of kernel code would be very
risky: this machinery is only used in situations such as get_user() in which the
kernel code does nothing with the page other than to extract data from it and
pass it back to the DTrace module.
The impact on the core x86 fault path when CONFIG_DTRACE is set is one
unlikely() conditional and one function call: when CONFIG_DTRACE is not on the
impact is zero. (Eliminating the function call and directly testing the DTrace
per-CPU flag is possible as a future optimization, but was considered too
invasive for now.)
Orabug:
18412802
Signed-off-by: Nick Alcock <nick.alcock@oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees@oracle.com>
Reviewed-by: Chuck Anderson <chuck.anderson@oracle.com>
Conflicts:
arch/x86/mm/fault.c
#include <linux/hugetlb.h> /* hstate_index_to_shift */
#include <linux/prefetch.h> /* prefetchw */
#include <linux/context_tracking.h> /* exception_enter(), ... */
+#include <linux/dtrace_os.h> /* dtrace_no_pf */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
return;
}
+ /*
+ * From here on, we know this must be a fault in userspace.
+ */
+
/* kprobes don't want to hook the spurious faults: */
if (unlikely(kprobes_fault(regs)))
return;
return;
}
+ /*
+ * DTrace doesn't want to either.
+ */
+ if (unlikely(dtrace_no_pf(regs)))
+ return;
+
/*
* If we're in an interrupt, have no user context or are running
* in an atomic region then we must not take the fault:
#define CPU_DTRACE_TUPOFLOW 0x0200
#define CPU_DTRACE_ENTRY 0x0800
#define CPU_DTRACE_BADSTACK 0x1000
+#define CPU_DTRACE_NOPF 0x2000
+#define CPU_DTRACE_PF_TRAPPED 0x4000
#define CPU_DTRACE_FAULT (CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \
CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \
CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \
CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \
- CPU_DTRACE_BADSTACK)
+ CPU_DTRACE_BADSTACK | CPU_DTRACE_PF_TRAPPED)
#define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP)
typedef uint32_t processorid_t;
#ifndef HEADERS_CHECK
+#ifdef CONFIG_DTRACE
+
#include <linux/uprobes.h>
#include <asm/asm-offsets.h>
+#include <linux/dtrace_cpu.h>
#define DTRACE_IDNONE 0
} stacktrace_state_t;
extern void dtrace_stacktrace(stacktrace_state_t *);
+extern int dtrace_handle_no_pf(struct pt_regs *);
+
+/*
+ * This is only safe to call if we know this is a userspace fault
+ * or that the call happens after early boot.
+ */
+static inline int dtrace_no_pf(struct pt_regs *regs)
+{
+ if (unlikely(DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOPF)))
+ return dtrace_handle_no_pf(regs);
+ return 0;
+}
extern struct task_struct *register_pid_provider(pid_t);
extern void unregister_pid_provider(pid_t);
extern int dtrace_tracepoint_enable(pid_t, uintptr_t, fasttrap_machtp_t *);
extern int dtrace_tracepoint_disable(pid_t, fasttrap_machtp_t *);
-#endif
+#else
+
+/*
+ * See arch/x86/mm/fault.c.
+ */
+
+#define dtrace_no_pf(ignore) 0
+
+#endif /* CONFIG_DTRACE */
+
+#endif /* !HEADERS_CHECK */
#endif /* _LINUX_DTRACE_OS_H_ */
EXPORT_SYMBOL(dtrace_stacktrace);
/*---------------------------------------------------------------------------*\
-(* INVALID OPCODE HANDLING *)
+(* INVALID OPCODE AND PAGE FAULT HANDLING *)
\*---------------------------------------------------------------------------*/
typedef struct dtrace_invop_hdlr {
uint8_t (*dtih_func)(struct pt_regs *);
}
EXPORT_SYMBOL(dtrace_disable);
+/*
+ * The dtrace-is-active body of dtrace_no_pf(), split into a separate function
+ * to keep icache pressure down while incurring function call overhead only in
+ * the rare dtrace-active, pf-disabled case.
+ */
+int dtrace_handle_no_pf(struct pt_regs *regs)
+{
+ struct insn insn;
+
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_PF_TRAPPED);
+
+ kernel_insn_init(&insn, (void *)regs->ip);
+ insn_get_length(&insn);
+
+ regs->ip += insn.length;
+
+ return 1;
+}
+
int dtrace_invop_add(uint8_t (*func)(struct pt_regs *))
{
dtrace_invop_hdlr_t *hdlr;