This patch implements dynamic ftrace for PA-RISC. The required mcount
call sequences can get pretty long, so instead of patching the
whole call sequence out of the functions, we are using
-fpatchable-function-entry from gcc. This puts a configurable amount of
NOPS before/at the start of the function. Taking do_sys_open() as example,
which would look like this when the call is patched out:
1036b248:       08 00 02 40     nop
1036b24c:       08 00 02 40     nop
1036b250:       08 00 02 40     nop
1036b254:       08 00 02 40     nop
1036b258 <do_sys_open>:
1036b258:       08 00 02 40     nop
1036b25c:       08 03 02 41     copy r3,r1
1036b260:       6b c2 3f d9     stw rp,-14(sp)
1036b264:       08 1e 02 43     copy sp,r3
1036b268:       6f c1 01 00     stw,ma r1,80(sp)
When ftrace gets enabled for this function the kernel will patch these
NOPs to:
1036b248:       10 19 57 20     <address of ftrace>
1036b24c:       6f c1 00 80     stw,ma r1,40(sp)
1036b250:       48 21 3f d1     ldw -18(r1),r1
1036b254:       e8 20 c0 02     bv,n r0(r1)
1036b258 <do_sys_open>:
1036b258:       e8 3f 1f df     b,l,n .-c,r1
1036b25c:       08 03 02 41     copy r3,r1
1036b260:       6b c2 3f d9     stw rp,-14(sp)
1036b264:       08 1e 02 43     copy sp,r3
1036b268:       6f c1 01 00     stw,ma r1,80(sp)
So the first NOP in do_sys_open() will be patched to jump backwards into
some minimal trampoline code which pushes a stackframe, saves r1 which
holds the return address, loads the address of the real ftrace function,
and branches to that location. For 64 Bit things are getting a bit more
complicated (and longer) because we must make sure that the address of
ftrace location is 8 byte aligned, and the offset passed to ldd for
fetching the address is 8 byte aligned as well.
Note that gcc has a bug which misplaces the function label, and needs a
patch to make dynamic ftrace work. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90751 for details.
Signed-off-by: Sven Schnelle <svens@stackframe.org>
Signed-off-by: Helge Deller <deller@gmx.de>
        select HAVE_ARCH_KGDB
        select HAVE_KPROBES
        select HAVE_KRETPROBES
+       select HAVE_DYNAMIC_FTRACE if $(cc-option,-fpatchable-function-entry=1,1)
+       select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE
 
        help
          The PA-RISC microprocessor is designed by Hewlett-Packard and used
 
        endif
 endif
 
+ifdef CONFIG_DYNAMIC_FTRACE
+ifdef CONFIG_64BIT
+NOP_COUNT := 8
+else
+NOP_COUNT := 5
+endif
+
+export CC_USING_RECORD_MCOUNT:=1
+export CC_USING_PATCHABLE_FUNCTION_ENTRY:=1
+
+KBUILD_AFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1
+KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \
+                -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT)
+
+CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1)))
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/parisc/kernel/module.lds
+endif
+
 OBJCOPY_FLAGS =-O binary -R .note -R .comment -S
 
 cflags-y       := -pipe
 
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 
-#define MCOUNT_INSN_SIZE 4
-
+#define MCOUNT_ADDR            ((unsigned long)mcount)
+#define MCOUNT_INSN_SIZE       4
+#define CC_USING_NOP_MCOUNT
 extern unsigned long sys_call_table[];
 
 extern unsigned long return_address(unsigned int);
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_caller(void);
+
+struct dyn_arch_ftrace {
+};
+
+unsigned long ftrace_call_adjust(unsigned long addr);
+
+#endif
+
 #define ftrace_return_address(n) return_address(n)
 
 #endif /* __ASSEMBLY__ */
 
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
-CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_cache.o = -pg
-CFLAGS_REMOVE_perf.o = -pg
-CFLAGS_REMOVE_unwind.o = -pg
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cache.o =  $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_perf.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_unwind.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
 endif
 
 obj-$(CONFIG_SMP)      += smp.o
 
 #endif
 ENDPROC_CFI(mcount)
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#ifdef CONFIG_64BIT
+#define FTRACE_FRAME_SIZE (2*FRAME_SIZE)
+#else
+#define FTRACE_FRAME_SIZE FRAME_SIZE
+#endif
+ENTRY_CFI(ftrace_caller, caller,frame=FTRACE_FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP)
+ftrace_caller:
+       .global ftrace_caller
+
+       STREG   %r3, -FTRACE_FRAME_SIZE+1*REG_SZ(%sp)
+       ldo     -FTRACE_FRAME_SIZE(%sp), %r3
+       STREG   %rp, -RP_OFFSET(%r3)
+
+       /* Offset 0 is already allocated for %r1 */
+       STREG   %r23, 2*REG_SZ(%r3)
+       STREG   %r24, 3*REG_SZ(%r3)
+       STREG   %r25, 4*REG_SZ(%r3)
+       STREG   %r26, 5*REG_SZ(%r3)
+       STREG   %r28, 6*REG_SZ(%r3)
+       STREG   %r29, 7*REG_SZ(%r3)
+#ifdef CONFIG_64BIT
+       STREG   %r19, 8*REG_SZ(%r3)
+       STREG   %r20, 9*REG_SZ(%r3)
+       STREG   %r21, 10*REG_SZ(%r3)
+       STREG   %r22, 11*REG_SZ(%r3)
+       STREG   %r27, 12*REG_SZ(%r3)
+       STREG   %r31, 13*REG_SZ(%r3)
+       loadgp
+       ldo     -16(%sp),%r29
+#endif
+       LDREG   0(%r3), %r25
+       copy    %rp, %r26
+       ldo     -8(%r25), %r25
+       b,l     ftrace_function_trampoline, %rp
+       copy    %r3, %r24
+
+       LDREG   -RP_OFFSET(%r3), %rp
+       LDREG   2*REG_SZ(%r3), %r23
+       LDREG   3*REG_SZ(%r3), %r24
+       LDREG   4*REG_SZ(%r3), %r25
+       LDREG   5*REG_SZ(%r3), %r26
+       LDREG   6*REG_SZ(%r3), %r28
+       LDREG   7*REG_SZ(%r3), %r29
+#ifdef CONFIG_64BIT
+       LDREG   8*REG_SZ(%r3), %r19
+       LDREG   9*REG_SZ(%r3), %r20
+       LDREG   10*REG_SZ(%r3), %r21
+       LDREG   11*REG_SZ(%r3), %r22
+       LDREG   12*REG_SZ(%r3), %r27
+       LDREG   13*REG_SZ(%r3), %r31
+#endif
+       LDREG   1*REG_SZ(%r3), %r3
+
+       LDREGM  -FTRACE_FRAME_SIZE(%sp), %r1
+       /* Adjust return point to jump back to beginning of traced function */
+       ldo     -4(%r1), %r1
+       bv,n    (%r1)
+
+ENDPROC_CFI(ftrace_caller)
+
+#endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        .align 8
 ENTRY_CFI(return_to_handler, caller,frame=FRAME_SIZE)
 
  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
  *
  * future possible enhancements:
- *     - add CONFIG_DYNAMIC_FTRACE
  *     - add CONFIG_STACK_TRACER
  */
 
 #include <linux/init.h>
 #include <linux/ftrace.h>
+#include <linux/uaccess.h>
 
 #include <asm/assembly.h>
 #include <asm/sections.h>
 #include <asm/ftrace.h>
-
+#include <asm/patch.h>
 
 #define __hot __attribute__ ((__section__ (".text.hot")))
 
                                unsigned long self_addr,
                                unsigned long org_sp_gr3)
 {
-       extern ftrace_func_t ftrace_trace_function;  /* depends on CONFIG_DYNAMIC_FTRACE */
-
-       if (ftrace_trace_function != ftrace_stub) {
-               /* struct ftrace_ops *op, struct pt_regs *regs); */
-               ftrace_trace_function(parent, self_addr, NULL, NULL);
-               return;
-       }
+#ifndef CONFIG_DYNAMIC_FTRACE
+       extern ftrace_func_t ftrace_trace_function;
+#endif
+       if (ftrace_trace_function != ftrace_stub)
+               ftrace_trace_function(self_addr, parent, NULL, NULL);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        if (ftrace_graph_return != (trace_func_graph_ret_t) ftrace_stub ||
 #endif
 }
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+int ftrace_enable_ftrace_graph_caller(void)
+{
+       return 0;
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+int __init ftrace_dyn_arch_init(void)
+{
+       return 0;
+}
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+       return 0;
+}
+
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+       return addr+(FTRACE_PATCHABLE_FUNCTION_SIZE-1)*4;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+       u32 insn[FTRACE_PATCHABLE_FUNCTION_SIZE];
+       u32 *tramp;
+       int size, ret, i;
+       void *ip;
+
+#ifdef CONFIG_64BIT
+       unsigned long addr2 =
+               (unsigned long)dereference_function_descriptor((void *)addr);
+
+       u32 ftrace_trampoline[] = {
+               0x73c10208, /* std,ma r1,100(sp) */
+               0x0c2110c1, /* ldd -10(r1),r1 */
+               0xe820d002, /* bve,n (r1) */
+               addr2 >> 32,
+               addr2 & 0xffffffff,
+               0xe83f1fd7, /* b,l,n .-14,r1 */
+       };
+
+       u32 ftrace_trampoline_unaligned[] = {
+               addr2 >> 32,
+               addr2 & 0xffffffff,
+               0x37de0200, /* ldo 100(sp),sp */
+               0x73c13e01, /* std r1,-100(sp) */
+               0x34213ff9, /* ldo -4(r1),r1 */
+               0x50213fc1, /* ldd -20(r1),r1 */
+               0xe820d002, /* bve,n (r1) */
+               0xe83f1fcf, /* b,l,n .-20,r1 */
+       };
+
+       BUILD_BUG_ON(ARRAY_SIZE(ftrace_trampoline_unaligned) >
+                               FTRACE_PATCHABLE_FUNCTION_SIZE);
+#else
+       u32 ftrace_trampoline[] = {
+               (u32)addr,
+               0x6fc10080, /* stw,ma r1,40(sp) */
+               0x48213fd1, /* ldw -18(r1),r1 */
+               0xe820c002, /* bv,n r0(r1) */
+               0xe83f1fdf, /* b,l,n .-c,r1 */
+       };
+#endif
+
+       BUILD_BUG_ON(ARRAY_SIZE(ftrace_trampoline) >
+                               FTRACE_PATCHABLE_FUNCTION_SIZE);
+
+       size = sizeof(ftrace_trampoline);
+       tramp = ftrace_trampoline;
+
+#ifdef CONFIG_64BIT
+       if (rec->ip & 0x4) {
+               size = sizeof(ftrace_trampoline_unaligned);
+               tramp = ftrace_trampoline_unaligned;
+       }
+#endif
+
+       ip = (void *)(rec->ip + 4 - size);
+
+       ret = probe_kernel_read(insn, ip, size);
+       if (ret)
+               return ret;
+
+       for (i = 0; i < size / 4; i++) {
+               if (insn[i] != INSN_NOP)
+                       return -EINVAL;
+       }
+
+       __patch_text_multiple(ip, tramp, size);
+       return 0;
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+                   unsigned long addr)
+{
+       u32 insn[FTRACE_PATCHABLE_FUNCTION_SIZE];
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(insn); i++)
+               insn[i] = INSN_NOP;
+
+       __patch_text_multiple((void *)rec->ip + 4 - sizeof(insn),
+                             insn, sizeof(insn));
+       return 0;
+}
+#endif
 
        const char *strtab = NULL;
        const Elf_Shdr *s;
        char *secstrings;
+       int err, symindex = -1;
        Elf_Sym *newptr, *oldptr;
        Elf_Shdr *symhdr = NULL;
 #ifdef DEBUG
                if(sechdrs[i].sh_type == SHT_SYMTAB
                   && (sechdrs[i].sh_flags & SHF_ALLOC)) {
                        int strindex = sechdrs[i].sh_link;
+                       symindex = i;
                        /* FIXME: AWFUL HACK
                         * The cast is to drop the const from
                         * the sechdrs pointer */
                if (!strcmp(".altinstructions", secname))
                        /* patch .altinstructions */
                        apply_alternatives(aseg, aseg + s->sh_size, me->name);
-       }
 
+               /* For 32 bit kernels we're compiling modules with
+                * -ffunction-sections so we must relocate the addresses in the
+                *__mcount_loc section.
+                */
+               if (symindex != -1 && !strcmp(secname, "__mcount_loc")) {
+                       if (s->sh_type == SHT_REL)
+                               err = apply_relocate((Elf_Shdr *)sechdrs,
+                                                       strtab, symindex,
+                                                       s - sechdrs, me);
+                       else if (s->sh_type == SHT_RELA)
+                               err = apply_relocate_add((Elf_Shdr *)sechdrs,
+                                                       strtab, symindex,
+                                                       s - sechdrs, me);
+                       if (err)
+                               return err;
+               }
+       }
        return 0;
 }
 
 
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+SECTIONS {
+       __mcount_loc : {
+               *(__patchable_function_entries)
+       }
+}
 
                                *(.data..vm0.pgd) \
                                *(.data..vm0.pte)
 
+#define CC_USING_PATCHABLE_FUNCTION_ENTRY
+
 #include <asm-generic/vmlinux.lds.h>
 
 /* needed for the processor specific cache alignment size */   
 
 #include <asm/cacheflush.h>
 #include <asm/fixmap.h>
 
-void set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
+void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
 {
        unsigned long vaddr = __fix_to_virt(idx);
        pgd_t *pgd = pgd_offset_k(vaddr);
        flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
 }
 
-void clear_fixmap(enum fixed_addresses idx)
+void notrace clear_fixmap(enum fixed_addresses idx)
 {
        unsigned long vaddr = __fix_to_virt(idx);
        pgd_t *pgd = pgd_offset_k(vaddr);
 
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
+#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
+#define MCOUNT_REC()   . = ALIGN(8);                           \
+                       __start_mcount_loc = .;                 \
+                       KEEP(*(__patchable_function_entries))   \
+                       __stop_mcount_loc = .;
+#else
 #define MCOUNT_REC()   . = ALIGN(8);                           \
                        __start_mcount_loc = .;                 \
                        KEEP(*(__mcount_loc))                   \
                        __stop_mcount_loc = .;
+#endif
 #else
 #define MCOUNT_REC()
 #endif