]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
dtrace: stub-based syscall tracing
authorKris Van Hees <kris.van.hees@oracle.com>
Thu, 27 Oct 2011 14:39:19 +0000 (10:39 -0400)
committerNick Alcock <nick.alcock@oracle.com>
Mon, 29 Jun 2015 21:39:56 +0000 (22:39 +0100)
Due to the need for specialized code handling (mainly passing in a pt_regs
structure as one of the arguments), some syscalls are called through a stub
in assembly code.

We duplicate the stub cdode in dtrace_stubs_x86_64.S, but instead of calling
the actual syscall implementation code call our own syscall-specific handler,
which ensures that entry and return probes are called as enabled, and then
call the underlying implementation directly for handling the syscall.

Also removed debugging output that is no longer relevant (code cleanup).

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
include/linux/cyclic.h
include/linux/dtrace_os.h
kernel/dtrace/Makefile
kernel/dtrace/dtrace_os.c
kernel/dtrace/dtrace_stubs_x86_64.S [new file with mode: 0644]

index 246e049e251866598feeb1ec1624be93674effe9..a5665a8fd888c5ca2cd545ef6848971a8572d4a9 100644 (file)
@@ -31,7 +31,14 @@ typedef struct cyc_time {
        ktime_t cyt_interval;
 } cyc_time_t;
 
+typedef struct cyc_omni_handler {
+       void (*cyo_online)(void *, uint32_t, cyc_handler_t *, cyc_time_t *);
+       void (*cyo_offline)(void *, uint32_t, void *);
+       void *cyo_arg;
+} cyc_omni_handler_t;
+
 extern cyclic_id_t cyclic_add(cyc_handler_t *, cyc_time_t *);
+extern cyclic_id_t cyclic_add_omni(cyc_omni_handler_t *);
 extern void cyclic_remove(cyclic_id_t);
 
 #endif /* _CYCLIC_H_ */
index 9b3268f60885ba3367d038f63fdf3bbecec4e462..d8e146fbcb1409046fd1dc7e0a5e58abf488b247 100644 (file)
@@ -9,6 +9,15 @@ typedef uint32_t dtrace_id_t;
 
 #define DTRACE_IDNONE 0
 
+#define SCE_CLONE              0
+#define SCE_FORK               1
+#define SCE_VFORK              2
+#define SCE_SIGALTSTACK                3
+#define SCE_IOPL               4
+#define SCE_EXECVE             5
+#define SCE_RT_SIGRETURN       6
+#define SCE_nr_stubs           7
+
 typedef void (*sys_call_ptr_t)(void);
 typedef long (*dt_sys_call_t)(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
                              uintptr_t, uintptr_t);
@@ -29,6 +38,7 @@ typedef struct systrace_info {
        dtrace_systrace_probe_t *probep;
        dtrace_systrace_probe_t stub;
        dt_sys_call_t           syscall;
+       dt_sys_call_t           stubs[SCE_nr_stubs];
        dtrace_syscalls_t       sysent[NR_syscalls];
 } systrace_info_t;
 
index de21ad742195fc6ae5cd92df57ea31d906df9244..1713210d969e212c737dab3e7d352ed8affe1c4d 100644 (file)
@@ -5,5 +5,6 @@
 GCOV_PROFILE := y
 
 ifdef CONFIG_DT_CORE
-obj-y                          += dtrace_os.o sdt_register.o
+obj-y                          += dtrace_os.o dtrace_stubs_x86_64.o \
+                                  sdt_register.o
 endif
index 56f9000e8a5477f80d815e1c8628548ad634480c..c97d2202bc895297043dad1a6b58f61f00fbf7f2 100644 (file)
 #include <linux/vmalloc.h>
 #include <asm/stacktrace.h>
 
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <asm/syscalls.h>
+
 /*
  * Return a high resolution timer value that is guaranteed to always increase.
  */
@@ -140,6 +144,18 @@ cyclic_id_t cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
 }
 EXPORT_SYMBOL(cyclic_add);
 
+/*
+ * Add a new omnipresent cyclic to the system.
+ */
+cyclic_id_t cyclic_add_omni(cyc_omni_handler_t *omni)
+{
+       if (omni == NULL)
+               return CYCLIC_NONE;
+
+       return CYCLIC_NONE;
+}
+EXPORT_SYMBOL(cyclic_add_omni);
+
 /*
  * Remove a specific cyclic from the system.
  */
@@ -158,84 +174,6 @@ void cyclic_remove(cyclic_id_t id)
 }
 EXPORT_SYMBOL(cyclic_remove);
 
-void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
-                      uintptr_t, uintptr_t);
-
-void systrace_stub(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
-                  uintptr_t arg2, uintptr_t arg3, uintptr_t arg4,
-                  uintptr_t arg5)
-{
-}
-
-asmlinkage long systrace_syscall(uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t);
-
-static systrace_info_t systrace_info = {
-                                           &systrace_probe,
-                                           systrace_stub,
-                                           systrace_syscall,
-                                           {
-/*
- * Need to remove the define for _ASM_X86_UNISTD_64_H in order for unistd_64
- * to be included here because it was already included indirectly.
- */
-#undef __SYSCALL
-#define __SYSCALL(nr, sym)                     [nr] { __stringify(sym), },
-# undef _ASM_X86_UNISTD_64_H
-#include <asm/unistd.h>
-                                           }
-                                       };
-
-
-asmlinkage long systrace_syscall(uintptr_t arg0, uintptr_t arg1,
-                                uintptr_t arg2, uintptr_t arg3,
-                                uintptr_t arg4, uintptr_t arg5)
-{
-       long                    rc = 0;
-       unsigned long           sysnum;
-       dtrace_id_t             id;
-       dtrace_syscalls_t       *sc;
-
-       asm volatile("movq %%rax,%0" : "=m"(sysnum));
-
-       sc = &systrace_info.sysent[sysnum];
-
-       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
-               (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5);
-
-       /*
-        * FIXME: Add stop functionality for DTrace.
-        */
-
-       if (sc->stsy_underlying != NULL)
-               rc = (*sc->stsy_underlying)(arg0, arg1, arg2, arg3, arg4,
-                                           arg5);
-
-       if ((id = sc->stsy_return) != DTRACE_IDNONE)
-               (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5);
-
-       return rc;
-}
-
-systrace_info_t *dtrace_syscalls_init() {
-       int                     i;
-       extern sys_call_ptr_t   sys_call_table[NR_syscalls];
-
-       systrace_info.probep = &systrace_probe;
-       systrace_info.stub = systrace_stub;
-       systrace_info.syscall = systrace_syscall;
-
-       for (i = 0; i < NR_syscalls; i++) {
-               systrace_info.sysent[i].stsy_tblent = &sys_call_table[i];
-               systrace_info.sysent[i].stsy_underlying =
-                                       (dt_sys_call_t)sys_call_table[i];
-       }
-
-       return &systrace_info;
-}
-EXPORT_SYMBOL(dtrace_syscalls_init);
-
 static int dtrace_stacktrace_stack(void *data, char *name)
 {
        stacktrace_state_t      *st = (stacktrace_state_t *)data;
@@ -341,3 +279,317 @@ void dtrace_stacktrace(stacktrace_state_t *st)
                                   : &dtrace_stacktrace_ops, st);
 }
 EXPORT_SYMBOL(dtrace_stacktrace);
+
+void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+                      uintptr_t, uintptr_t);
+
+void systrace_stub(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
+                  uintptr_t arg2, uintptr_t arg3, uintptr_t arg4,
+                  uintptr_t arg5)
+{
+}
+
+asmlinkage long systrace_syscall(uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_clone(uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_fork(uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_vfork(uintptr_t, uintptr_t,
+                                 uintptr_t, uintptr_t,
+                                 uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_sigaltstack(uintptr_t, uintptr_t,
+                                       uintptr_t, uintptr_t,
+                                       uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_iopl(uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_execve(uintptr_t, uintptr_t,
+                                  uintptr_t, uintptr_t,
+                                  uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_rt_sigreturn(uintptr_t, uintptr_t,
+                                        uintptr_t, uintptr_t,
+                                        uintptr_t, uintptr_t);
+
+static systrace_info_t systrace_info =
+               {
+                       &systrace_probe,
+                       systrace_stub,
+                       systrace_syscall,
+                       {
+                           [SCE_CLONE] dtrace_stub_clone,
+                           [SCE_FORK] dtrace_stub_fork,
+                           [SCE_VFORK] dtrace_stub_vfork,
+                           [SCE_SIGALTSTACK] dtrace_stub_sigaltstack,
+                           [SCE_IOPL] dtrace_stub_iopl,
+                           [SCE_EXECVE] dtrace_stub_execve,
+                           [SCE_RT_SIGRETURN] dtrace_stub_rt_sigreturn,
+                       },
+                       {
+/*
+ * Need to remove the define for _ASM_X86_UNISTD_64_H in order for unistd_64
+ * to be included here because it was already included indirectly.
+ */
+#undef __SYSCALL
+#define __SYSCALL(nr, sym)     [nr] { __stringify(sym), },
+# undef _ASM_X86_UNISTD_64_H
+#include <asm/unistd.h>
+                       }
+               };
+
+
+long systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
+                     uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+{
+       long                    rc = 0;
+       unsigned long           sysnum;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       asm volatile("movq %%rax,%0" : "=m"(sysnum));
+
+       sc = &systrace_info.sysent[sysnum];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       if (sc->stsy_underlying != NULL)
+               rc = (*sc->stsy_underlying)(arg0, arg1, arg2, arg3, arg4,
+                                           arg5);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+systrace_info_t *dtrace_syscalls_init() {
+       int                     i;
+       extern sys_call_ptr_t   sys_call_table[NR_syscalls];
+
+       for (i = 0; i < NR_syscalls; i++) {
+               systrace_info.sysent[i].stsy_tblent = &sys_call_table[i];
+               systrace_info.sysent[i].stsy_underlying =
+                                       (dt_sys_call_t)sys_call_table[i];
+       }
+
+       return &systrace_info;
+}
+EXPORT_SYMBOL(dtrace_syscalls_init);
+
+long dtrace_clone(unsigned long clone_flags, unsigned long newsp,
+                 void __user *parent_tid, void __user *child_tid,
+                 struct pt_regs *regs)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       if (!newsp)
+               newsp = regs->sp;
+
+       sc = &systrace_info.sysent[__NR_clone];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, clone_flags, newsp,
+                                 (uintptr_t)parent_tid, (uintptr_t)child_tid,
+                                 (uintptr_t)regs, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       rc = do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_fork(struct pt_regs *regs)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       sc = &systrace_info.sysent[__NR_fork];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)regs, 0, 0, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       rc = do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_vfork(struct pt_regs *regs)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       sc = &systrace_info.sysent[__NR_vfork];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)regs, 0, 0, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       rc = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
+                    NULL, NULL);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_execve(const char __user *name,
+                  const char __user *const __user *argv,
+                  const char __user *const __user *envp, struct pt_regs *regs)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+       char                    *filename;
+
+       sc = &systrace_info.sysent[__NR_execve];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)name, (uintptr_t)argv,
+                                 (uintptr_t)envp, (uintptr_t)regs, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       filename = getname(name);
+       rc = PTR_ERR(filename);
+       if (IS_ERR(filename))
+               goto out;
+
+       rc = do_execve(filename, argv, envp, regs);
+
+       putname(filename);
+
+out:
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+                       struct pt_regs *regs)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       sc = &systrace_info.sysent[__NR_sigaltstack];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)uss, (uintptr_t)uoss,
+                                 (uintptr_t)regs, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+        rc =  do_sigaltstack(uss, uoss, regs->sp);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_iopl(unsigned int level, struct pt_regs *regs)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+       unsigned int            old = (regs->flags >> 12) & 3;
+       struct thread_struct    *t = &current->thread;
+
+       sc = &systrace_info.sysent[__NR_iopl];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)level, (uintptr_t)regs,
+                                 0, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       if (level > 3) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       /* Trying to gain more privileges? */
+       if (level > old) {
+               if (!capable(CAP_SYS_RAWIO)) {
+                       rc = -EPERM;
+                       goto out;
+               }
+       }
+
+       regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
+       t->iopl = level << 12;
+       set_iopl_mask(t->iopl);
+
+out:
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_rt_sigreturn(struct pt_regs *regs)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       sc = &systrace_info.sysent[__NR_rt_sigreturn];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)regs, 0, 0, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       rc = sys_rt_sigreturn(regs);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
diff --git a/kernel/dtrace/dtrace_stubs_x86_64.S b/kernel/dtrace/dtrace_stubs_x86_64.S
new file mode 100644 (file)
index 0000000..dde72e3
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+ * FILE:        dtrace_stubs_x86_64.S
+ * DESCRIPTION: Dynamic Tracing: x86_64 specific stubs (based on entry_64.S)
+ *
+ * Copyright (C) 2010, 2011 Oracle Corporation
+ */
+
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/asm-offsets.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/hw_irq.h>
+#include <asm/page_types.h>
+#include <asm/irqflags.h>
+#include <asm/paravirt.h>
+#include <asm/ftrace.h>
+#include <asm/percpu.h>
+
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_X86_64      (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_64BIT 0x80000000
+#define __AUDIT_ARCH_LE           0x40000000
+
+       .code64
+       .section .entry.text, "ax"
+
+/*
+ * C code is not supposed to know about undefined top of stack. Every time
+ * a C function with an pt_regs argument is called from the SYSCALL based
+ * fast path FIXUP_TOP_OF_STACK is needed.
+ * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
+ * manipulation.
+ */
+
+       /* %rsp:at FRAMEEND */
+       .macro FIXUP_TOP_OF_STACK tmp offset=0
+       movq PER_CPU_VAR(old_rsp),\tmp
+       movq \tmp,RSP+\offset(%rsp)
+       movq $__USER_DS,SS+\offset(%rsp)
+       movq $__USER_CS,CS+\offset(%rsp)
+       movq $-1,RCX+\offset(%rsp)
+       movq R11+\offset(%rsp),\tmp  /* get eflags */
+       movq \tmp,EFLAGS+\offset(%rsp)
+       .endm
+
+       .macro RESTORE_TOP_OF_STACK tmp offset=0
+       movq RSP+\offset(%rsp),\tmp
+       movq \tmp,PER_CPU_VAR(old_rsp)
+       movq EFLAGS+\offset(%rsp),\tmp
+       movq \tmp,R11+\offset(%rsp)
+       .endm
+
+       .macro FAKE_STACK_FRAME child_rip
+       /* push in order ss, rsp, eflags, cs, rip */
+       xorl %eax, %eax
+       pushq_cfi $__KERNEL_DS /* ss */
+       /*CFI_REL_OFFSET        ss,0*/
+       pushq_cfi %rax /* rsp */
+       CFI_REL_OFFSET  rsp,0
+       pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
+       /*CFI_REL_OFFSET        rflags,0*/
+       pushq_cfi $__KERNEL_CS /* cs */
+       /*CFI_REL_OFFSET        cs,0*/
+       pushq_cfi \child_rip /* rip */
+       CFI_REL_OFFSET  rip,0
+       pushq_cfi %rax /* orig rax */
+       .endm
+
+       .macro UNFAKE_STACK_FRAME
+       addq $8*6, %rsp
+       CFI_ADJUST_CFA_OFFSET   -(6*8)
+       .endm
+
+/*
+ * initial frame state for interrupts (and exceptions without error code)
+ */
+       .macro EMPTY_FRAME start=1 offset=0
+       .if \start
+       CFI_STARTPROC simple
+       CFI_SIGNAL_FRAME
+       CFI_DEF_CFA rsp,8+\offset
+       .else
+       CFI_DEF_CFA_OFFSET 8+\offset
+       .endif
+       .endm
+
+/*
+ * initial frame state for interrupts (and exceptions without error code)
+ */
+       .macro INTR_FRAME start=1 offset=0
+       EMPTY_FRAME \start, SS+8+\offset-RIP
+       /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
+       CFI_REL_OFFSET rsp, RSP+\offset-RIP
+       /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
+       /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
+       CFI_REL_OFFSET rip, RIP+\offset-RIP
+       .endm
+
+/*
+ * initial frame state for exceptions with error code (and interrupts
+ * with vector already pushed)
+ */
+       .macro XCPT_FRAME start=1 offset=0
+       INTR_FRAME \start, RIP+\offset-ORIG_RAX
+       /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
+       .endm
+
+/*
+ * frame that enables calling into C.
+ */
+       .macro PARTIAL_FRAME start=1 offset=0
+       XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
+       CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
+       CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
+       CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
+       CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
+       CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
+       CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
+       CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
+       CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
+       CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+       .endm
+
+/*
+ * frame that enables passing a complete pt_regs to a C function.
+ */
+       .macro DEFAULT_FRAME start=1 offset=0
+       PARTIAL_FRAME \start, R11+\offset-R15
+       CFI_REL_OFFSET rbx, RBX+\offset
+       CFI_REL_OFFSET rbp, RBP+\offset
+       CFI_REL_OFFSET r12, R12+\offset
+       CFI_REL_OFFSET r13, R13+\offset
+       CFI_REL_OFFSET r14, R14+\offset
+       CFI_REL_OFFSET r15, R15+\offset
+       .endm
+
+/*
+ * Certain special system calls that need to save a complete full stack frame.
+ */
+       .macro PTREGSCALL label,func,arg
+ENTRY(\label)
+       PARTIAL_FRAME 1 8               /* offset 8: return address */
+       subq $REST_SKIP, %rsp
+       CFI_ADJUST_CFA_OFFSET REST_SKIP
+       call save_rest
+       DEFAULT_FRAME 0 8               /* offset 8: return address */
+       leaq 8(%rsp), \arg      /* pt_regs pointer */
+       call \func
+       jmp ptregscall_common
+       CFI_ENDPROC
+END(\label)
+       .endm
+
+       PTREGSCALL dtrace_stub_clone, dtrace_clone, %r8
+       PTREGSCALL dtrace_stub_fork, dtrace_fork, %rdi
+       PTREGSCALL dtrace_stub_vfork, dtrace_vfork, %rdi
+       PTREGSCALL dtrace_stub_sigaltstack, dtrace_sigaltstack, %rdx
+       PTREGSCALL dtrace_stub_iopl, dtrace_iopl, %rsi
+
+ENTRY(dtrace_stub_execve)
+       CFI_STARTPROC
+       addq $8, %rsp
+       PARTIAL_FRAME 0
+       SAVE_REST
+       FIXUP_TOP_OF_STACK %r11
+       movq %rsp, %rcx
+       call dtrace_execve
+       RESTORE_TOP_OF_STACK %r11
+       movq %rax,RAX(%rsp)
+       RESTORE_REST
+       jmp int_ret_from_sys_call
+       CFI_ENDPROC
+END(dtrace_stub_execve)
+
+/*
+ * sigreturn is special because it needs to restore all registers on return.
+ * This cannot be done with SYSRET, so use the IRET return path instead.
+ */
+ENTRY(dtrace_stub_rt_sigreturn)
+       CFI_STARTPROC
+       addq $8, %rsp
+       PARTIAL_FRAME 0
+       SAVE_REST
+       movq %rsp,%rdi
+       FIXUP_TOP_OF_STACK %r11
+       call dtrace_rt_sigreturn
+       movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
+       RESTORE_REST
+       jmp int_ret_from_sys_call
+       CFI_ENDPROC
+END(dtrace_stub_rt_sigreturn)