__get_datapage() is only a few instructions to retrieve the
address of the page where the kernel stores data to the VDSO.
By inlining this function into its users, a bl/blr pair and
a mflr/mtlr pair is avoided, plus a few reg moves.
The improvement is noticeable (about 55 nsec/call on an 8xx)
vdsotest before the patch:
gettimeofday:    vdso: 731 nsec/call
clock-gettime-realtime-coarse:    vdso: 668 nsec/call
clock-gettime-monotonic-coarse:    vdso: 745 nsec/call
vdsotest after the patch:
gettimeofday:    vdso: 677 nsec/call
clock-gettime-realtime-coarse:    vdso: 613 nsec/call
clock-gettime-monotonic-coarse:    vdso: 690 nsec/call
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/c39ef7f3dfa25356b01e211d539671f279086c09.1575273217.git.christophe.leroy@c-s.fr
 
 extern struct vdso_data *vdso_data;
 
+#else /* __ASSEMBLY__ */
+
+.macro get_datapage ptr, tmp
+       bcl     20, 31, .+4
+       mflr    \ptr
+       addi    \ptr, \ptr, (__kernel_datapage_offset - (.-4))@l
+       lwz     \tmp, 0(\ptr)
+       add     \ptr, \tmp, \ptr
+.endm
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
 
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
 #include <asm/asm-offsets.h>
 
        .text
   .cfi_startproc
        mflr    r12
   .cfi_register lr,r12
-       mr      r11,r3
-       bl      __get_datapage@local
+       get_datapage    r10, r0
        mtlr    r12
-       mr      r10,r3
 
        lwz     r7,CFG_DCACHE_BLOCKSZ(r10)
        addi    r5,r7,-1
-       andc    r6,r11,r5               /* round low to line bdy */
+       andc    r6,r3,r5                /* round low to line bdy */
        subf    r8,r6,r4                /* compute length */
        add     r8,r8,r5                /* ensure we get enough */
        lwz     r9,CFG_DCACHE_LOGBLOCKSZ(r10)
 
        lwz     r7,CFG_ICACHE_BLOCKSZ(r10)
        addi    r5,r7,-1
-       andc    r6,r11,r5               /* round low to line bdy */
+       andc    r6,r3,r5                /* round low to line bdy */
        subf    r8,r6,r4                /* compute length */
        add     r8,r8,r5
        lwz     r9,CFG_ICACHE_LOGBLOCKSZ(r10)
 
 #include <asm/asm-offsets.h>
 #include <asm/unistd.h>
 #include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
 
        .text
        .global __kernel_datapage_offset;
 __kernel_datapage_offset:
        .long   0
 
-V_FUNCTION_BEGIN(__get_datapage)
-  .cfi_startproc
-       /* We don't want that exposed or overridable as we want other objects
-        * to be able to bl directly to here
-        */
-       .protected __get_datapage
-       .hidden __get_datapage
-
-       mflr    r0
-  .cfi_register lr,r0
-
-       bcl     20,31,data_page_branch
-data_page_branch:
-       mflr    r3
-       mtlr    r0
-       addi    r3, r3, __kernel_datapage_offset-data_page_branch
-       lwz     r0,0(r3)
-  .cfi_restore lr
-       add     r3,r0,r3
-       blr
-  .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
 /*
  * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
  *
        mflr    r12
   .cfi_register lr,r12
        mr      r4,r3
-       bl      __get_datapage@local
+       get_datapage    r3, r0
        mtlr    r12
        addi    r3,r3,CFG_SYSCALL_MAP32
        cmpli   cr0,r4,0
   .cfi_startproc
        mflr    r12
   .cfi_register lr,r12
-       bl      __get_datapage@local
+       get_datapage    r3, r0
        lwz     r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
        lwz     r3,CFG_TB_TICKS_PER_SEC(r3)
        mtlr    r12
 
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
 #include <asm/asm-offsets.h>
 #include <asm/unistd.h>
 
 
        mr      r10,r3                  /* r10 saves tv */
        mr      r11,r4                  /* r11 saves tz */
-       bl      __get_datapage@local    /* get data page */
-       mr      r9, r3                  /* datapage ptr in r9 */
+       get_datapage    r9, r0
        cmplwi  r10,0                   /* check if tv is NULL */
        beq     3f
        lis     r7,1000000@ha           /* load up USEC_PER_SEC */
        mflr    r12                     /* r12 saves lr */
   .cfi_register lr,r12
        mr      r11,r4                  /* r11 saves tp */
-       bl      __get_datapage@local    /* get data page */
-       mr      r9,r3                   /* datapage ptr in r9 */
+       get_datapage    r9, r0
        lis     r7,NSEC_PER_SEC@h       /* want nanoseconds */
        ori     r7,r7,NSEC_PER_SEC@l
        beq     cr5, .Lcoarse_clocks
 
        mflr    r12
   .cfi_register lr,r12
-       bl      __get_datapage@local    /* get data page */
+       get_datapage    r3, r0
        lwz     r5, CLOCK_HRTIMER_RES(r3)
        mtlr    r12
        li      r3,0
   .cfi_register lr,r12
 
        mr      r11,r3                  /* r11 holds t */
-       bl      __get_datapage@local
-       mr      r9, r3                  /* datapage ptr in r9 */
+       get_datapage    r9, r0
 
        lwz     r3,STAMP_XTIME_SEC+LOPART(r9)