This commit continues the implementation of Function Boundary Tracing
(FBT) and fixes various problems with the original implementation and
other things in DTrace that it caused to break. It is done as a single
commit due to the intertwined nature of the code it touches.
1. The sparc64 fast path implementation (dtrace_caller) for the D 'caller'
variable was trampling the %g4 register which Linux uses to hold the
'current' task pointer. By passing in a dummy argument, we ensure
that we can use the %i1 register to temporarily store %g4.
2. For consistency, we are now using stacktrace_state_t instead of
struct stacktrace_state.
3. We now call dtrace_stacktrace() under NOFAULT protection.
4. The ustack stack walker has been rewritten (in the kernel), so the
previous implementation has been removed.
5. We no longer process probes when the kernel panics, to avoid DTrace
disrupting output that could be crucial to debugging.
6. We now ensure that re-entry of dtrace_probe() can no longer happen,
except for the ERROR probe (which is by a re-entry by design).
7. Since FBT now works, the restriction to only support SyS_* functions
has been removed.
Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com>
Reviewed-by: Nick Alcock <nick.alcock@oracle.com>
Orabug:
21220305
Orabug:
24829326
* CDDL HEADER END
*/
/*
- * Copyright 2010-2014 Oracle, Inc. All rights reserved.
+ * Copyright 2010-2017 Oracle, Inc. All rights reserved.
* Use is subject to license terms.
*/
mov %o2, %o0
ENDPROC(dtrace_casptr)
+/* FIXME */
ENTRY(dtrace_fish)
rd %pc, %g5
ba 0f
add %g2, 1, %o0 ! Failure; return deepest frame + 1
ENDPROC(dtrace_fish)
-/* FIXME */
+ /*
+ * Try to find caller in register windows.
+ *
+ * This is tricky as we must use globals during window rotation
+ * and must preserve them at the same time. For that reason the 2nd
+ * argument is not used. This allows us to use %o1 as a scratch to
+ * keep value of %g4 in and prevents caller to keep something in %o1.
+ */
ENTRY(dtrace_caller)
+ mov %g4, %o1
mov nwin_minus_one, %g4
rdpr %canrestore, %g2
cmp %g2, %o0
sub %g1, %o0, %g3
brgez,a,pt %g3, 0f
wrpr %g3, %cwp
- !
- ! CWP minus the number of frames is negative; we must perform the
- ! arithmetic modulo MAXWIN.
- !
+ /*
+ * CWP minus the number of frames is negative; we must perform the
+ * arithmetic modulo MAXWIN.
+ */
add %g4, %g3, %g3
inc %g3
wrpr %g3, %cwp
0:
mov %i7, %g4
wrpr %g1, %cwp
- retl
mov %g4, %o0
+ retl
+ mov %o1, %g4
1:
- !
- ! The caller has been flushed to the stack. This is unlikely
- ! (interrupts are disabled in dtrace_probe()), but possible (the
- ! interrupt inducing the spill may have been taken before the
- ! call to dtrace_probe()).
- !
+ /*
+ * The caller has been flushed to the stack. This is unlikely
+ * (interrupts are disabled in dtrace_probe()), but possible (the
+ * interrupt inducing the spill may have been taken before the
+ * call to dtrace_probe()).
+ */
+ mov %o1, %g4
retl
mov -1, %o0
ENDPROC(dtrace_caller)
*
* CDDL HEADER END
*
- * Copyright 2010-2014 Oracle, Inc. All rights reserved.
+ * Copyright 2010-2017 Oracle, Inc. All rights reserved.
* Use is subject to license terms.
*/
mstate->dtms_arg[0]);
mstate->dtms_caller = caller[1];
} else if ((mstate->dtms_caller =
- dtrace_caller(aframes)) == -1) {
+ dtrace_caller(aframes, 0)) == -1) {
/*
* We have failed to do this the quick way;
* we must resort to the slower approach of
*
* CDDL HEADER END
*
- * Copyright 2010 -- 2016 Oracle, Inc. All rights reserved.
+ * Copyright 2010-2017 Oracle, Inc. All rights reserved.
* Use is subject to license terms.
*/
void dtrace_getpcstack(uint64_t *pcstack, int pcstack_limit, int aframes,
uint32_t *intrpc)
{
- struct stacktrace_state st = {
+ stacktrace_state_t st = {
pcstack,
NULL,
pcstack_limit,
STACKTRACE_KERNEL
};
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_stacktrace(&st);
+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
while (st.depth < st.limit)
pcstack[st.depth++] = 0;
}
EXPORT_SYMBOL(dtrace_getpcstack);
-static struct vm_area_struct *find_user_vma(struct task_struct *tsk,
- struct mm_struct *mm,
- struct page **page,
- unsigned long addr,
- int need_incore)
-{
- struct vm_area_struct *vma = NULL;
- int nonblocking = 1;
- int flags = FOLL_IMMED;
- int ret;
-
- if (page)
- flags |= FOLL_GET;
-
- ret = __get_user_pages(tsk, mm, addr, 1, flags, page, &vma,
- &nonblocking);
-
- if ((nonblocking == 0) && need_incore) {
- if ((ret > 0) && page) {
- size_t i;
- for (i = 0; i < ret; i++)
- put_page(page[i]);
- }
- return NULL;
- }
- else if (ret <= 0)
- return NULL;
- else
- return vma;
-}
-
/*
* Get user stack entries up to the pcstack_limit; return the number of entries
* acquired. If pcstack is NULL, return the number of entries potentially
int pcstack_limit)
{
struct task_struct *p = current;
- struct mm_struct *mm = p->mm;
- unsigned long tos, bos, fpc;
- unsigned long *sp;
- unsigned long depth = 0;
- struct vm_area_struct *stack_vma;
- struct page *stack_page = NULL;
- struct pt_regs *regs = current_pt_regs();
+ stacktrace_state_t st;
+ unsigned long depth;
if (pcstack) {
if (unlikely(pcstack_limit < 2)) {
pcstack_limit -= 2;
}
- if (!user_mode(regs))
- goto out;
-
- /*
- * There is always at least one address to report: the instruction
- * pointer itself (frame 0).
- */
- depth++;
-
- fpc = instruction_pointer(regs);
- if (pcstack) {
- *pcstack++ = (uint64_t)fpc;
- pcstack_limit--;
- }
+ st.pcs = pcstack;
+ st.fps = fpstack;
+ st.limit = pcstack_limit;
+ st.depth = 0;
+ st.flags = STACKTRACE_USER;
- /*
- * We cannot ustack() if this task has no mm, if this task is a kernel
- * thread, or when someone else has the mmap_sem or the page_table_lock
- * (because find_user_vma() ultimately does a __get_user_pages() and
- * thence a follow_page(), which can take that lock).
- */
- if (mm == NULL || (p->flags & PF_KTHREAD) ||
- spin_is_locked(&mm->page_table_lock))
- goto out;
-
- if (!down_read_trylock(&mm->mmap_sem))
- goto out;
- atomic_inc(&mm->mm_users);
-
-#ifdef CONFIG_X86_64
- tos = current_user_stack_pointer();
-#elif defined(STACK_BIAS)
- tos = user_stack_pointer(current_pt_regs()) + STACK_BIAS;
-#else
-#error Not x86-64 nor a stack-biased platform, porting needed
-#endif
- stack_vma = find_user_vma(p, mm, NULL, (unsigned long) tos, 0);
- if (!stack_vma ||
- stack_vma->vm_start > (unsigned long) tos)
- goto unlock_out;
-
-#ifdef CONFIG_STACK_GROWSUP
-#error This code does not yet work on STACK_GROWSUP platforms.
-#endif
- bos = stack_vma->vm_end;
- if (stack_guard_page_end(stack_vma, bos))
- bos -= PAGE_SIZE;
-
- /*
- * If we have a pcstack, loop as long as we are within the stack limit.
- * Otherwise, loop until we run out of stack.
- */
- for (sp = (unsigned long *)tos;
- sp <= ((unsigned long *)bos - sizeof(unsigned long)) &&
- ((pcstack && pcstack_limit > 0) ||
- !pcstack);
- sp++) {
- struct vm_area_struct *code_vma;
- unsigned long addr;
- int copyret;
-
- /*
- * Recheck for faultedness and pin at page boundaries.
- */
- if (!stack_page || (((unsigned long)sp & PAGE_MASK) == 0)) {
- if (stack_page) {
- put_page(stack_page);
- stack_page = NULL;
- }
-
- if (!find_user_vma(p, mm, &stack_page,
- (unsigned long) sp, 1))
- break;
- }
+ dtrace_stacktrace(&st);
- pagefault_disable();
- copyret = copy_from_user(&addr, sp, sizeof(addr));
- pagefault_enable();
- if (copyret)
- break;
-
- if (addr == fpc)
- continue;
-
- code_vma = find_user_vma(p, mm, NULL, addr, 0);
-
- if (!code_vma || code_vma->vm_start > addr)
- continue;
-
- if ((addr >= tos && addr <= bos) ||
- (code_vma->vm_flags & VM_GROWSDOWN)) {
- /* stack address - may need it for the fpstack. */
- } else if (code_vma->vm_flags & VM_EXEC) {
- if (pcstack) {
- *pcstack++ = addr;
- pcstack_limit--;
- }
- depth++;
- }
+ depth = st.depth;
+ while (st.depth < st.limit) {
+ if (pcstack)
+ pcstack[st.depth++] = 0;
+ if (fpstack)
+ fpstack[st.depth++] = 0;
}
- if (stack_page != NULL)
- put_page(stack_page);
-
-unlock_out:
- atomic_dec(&mm->mm_users);
- up_read(&mm->mmap_sem);
-
-out:
- if (pcstack)
- while (pcstack_limit--)
- *pcstack++ = 0;
return depth;
}
{
uintptr_t old = mstate->dtms_scratch_ptr;
size_t size;
- struct stacktrace_state st = {
+ stacktrace_state_t st = {
NULL,
NULL,
0,
*
* CDDL HEADER END
*
- * Copyright 2010-2014 Oracle, Inc. All rights reserved.
+ * Copyright 2010-2017 Oracle, Inc. All rights reserved.
* Use is subject to license terms.
*/
return;
}
-#ifdef FIXME
- if (panic_quiesce) {
+ if (oops_in_progress) {
/*
* We don't trace anything if we're panicking.
*/
local_irq_restore(cookie);
return;
}
-#endif
+
+ flags = (volatile uint16_t *)&this_cpu_core->cpuc_dtrace_flags;
+
+ /*
+ * Probe context is not re-entrant, unless we're getting called to
+ * process an ERROR probe.
+ */
+ if ((*flags & CPU_DTRACE_PROBE_CTX) && id != dtrace_probeid_error) {
+ dt_dbg_probe("Attempt to fire probe from within a probe " \
+ "(ID %d, CPoID %d, U %d, pflag %d)\n", id,
+ (int)this_cpu_core->cpu_dtrace_caller, cpuid,
+ pflag);
+ if (pflag)
+ dtrace_preempt_on();
+ local_irq_restore(cookie);
+ return;
+ }
+
+ *flags |= CPU_DTRACE_PROBE_CTX;
+ this_cpu_core->cpu_dtrace_caller = id;
now = dtrace_gethrtime();
vtime = (dtrace_vtime_references > 0);
mstate.dtms_arg[3] = arg3;
mstate.dtms_arg[4] = arg4;
- flags = (volatile uint16_t *)&this_cpu_core->cpuc_dtrace_flags;
-
for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
dtrace_predicate_t *pred = ecb->dte_predicate;
dtrace_state_t *state = ecb->dte_state;
if (vtime)
current->dtrace_start = dtrace_gethrtime();
+ /*
+ * Only clear the flag if this is not the ERROR probe. We know that
+ * an ERROR probe executes from within another probe, and therefore
+ * we need to retain the probe context flag in the flags.
+ */
+ if (id != dtrace_probeid_error) {
+ *flags &= ~CPU_DTRACE_PROBE_CTX;
+ this_cpu_core->cpu_dtrace_caller = 0;
+ }
+
if (pflag)
dtrace_preempt_on();
local_irq_restore(cookie);
*
* CDDL HEADER END
*
- * Copyright 2010, 2011, 2012, 2013 Oracle, Inc. All rights reserved.
+ * Copyright 2010-2017 Oracle, Inc. All rights reserved.
* Use is subject to license terms.
*/
fbt_probe_t *fbp;
fbt_probe_t *prev;
- /*
- * Only syscalls for now...
- */
- if (strncmp(func, "SyS_", 4))
- return NULL;
-
switch (type) {
case FBT_ENTRY:
fbp = kzalloc(sizeof(fbt_probe_t), GFP_KERNEL);
*
* CDDL HEADER END
*
- * Copyright 2009-2014 Oracle, Inc. All rights reserved.
+ * Copyright 2009-2017 Oracle, Inc. All rights reserved.
* Use is subject to license terms.
*/
volatile uint16_t *);
extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t,
volatile uint16_t *);
-extern uintptr_t dtrace_caller(int);
+
+/*
+ * Plaforms that support a fast path to obtain the caller implement the
+ * dtrace_caller() function.
+ *
+ * The first argument is the number of frames that should be skipped when
+ * looking for a caller address. The 2nd argument is a dummy argument that
+ * is necessary for SPARC.
+ *
+ * On x86 this is effectively a NOP.
+ *
+ * On SPARC it is possible to retrieve the caller address from the register
+ * windows without flushing them to the stack. This involves performing
+ * explicit rotation of the register windows. Modification of the windowing
+ * mechanism state alters all %i, %o, and %l registers so we are can only use
+ * %g registers to store temporary data.
+ *
+ * On Linux a lot of %g registers are already allocated for specific purpose.
+ * Saving temporaries to the stack would be a violation of the fast path code
+ * logic. Therefore, the function prototype declares a 2nd argument that serves
+ * as a temporary value. A compiler will not expect that the value in %o1
+ * will survive the call and therefore dtrace_caller() can use %o1 as a
+ * temporary registe.
+ */
+extern uintptr_t dtrace_caller(int, int);
extern void dtrace_copyin_arch(uintptr_t, uintptr_t, size_t,
volatile uint16_t *);