]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
sparc64, vdso: sparc64 vDSO implementation.
authorNick Alcock <nick.alcock@oracle.com>
Mon, 8 Dec 2014 13:32:19 +0000 (13:32 +0000)
committerAllen Pais <allen.pais@oracle.com>
Tue, 15 Sep 2015 12:08:52 +0000 (17:38 +0530)
This commit adds a vDSO similar to that used on x86: in this commit, that vDSO
is empty bar the ELF note used by glibc to verify that it knows about this vDSO.
The vDSO's location is somewhat randomized, so, as a consequence, tends to
randomize the locations of other shared libraries too.  (The randomization
respects /proc/sys/kernel/randomize_va_space.)

It is derived from the implementation in recent kernels, in that it uses a C
generator to translate the vDSO shared library into C code and validate that it
contains no relocations and the like.

Notes for future improvement:

 - There is no support for a vDSO in 32-bit userspace yet.  This is just because
   I want to get the sparc64 version working first: the compat vDSO
   implementation adds significant complexity.

 - The vDSO randomization process is ugly: we are calling get_unmapped_area()
   twice, with a randomization in the middle.  Eventually,
   arch_get_unmapped_area() on SPARC64 should learn about PF_RANDOMIZE, as it
   has on other arches.

Orabug: 20861959
Signed-off-by: Nick Alcock <nick.alcock@oracle.com>
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
(cherry picked from commit 2da875e6f5781dd196e9f055cd53a3ac0d80aaaa)

20 files changed:
Documentation/kernel-parameters.txt
arch/sparc/Kbuild
arch/sparc/Makefile
arch/sparc/include/asm/elf_64.h
arch/sparc/include/asm/mmu_64.h
arch/sparc/include/asm/page_64.h
arch/sparc/include/asm/thread_info_32.h
arch/sparc/include/asm/thread_info_64.h
arch/sparc/include/asm/vdso.h [new file with mode: 0644]
arch/sparc/include/asm/vvar.h [new file with mode: 0644]
arch/sparc/include/uapi/asm/auxvec.h
arch/sparc/vdso/.gitignore [new file with mode: 0644]
arch/sparc/vdso/Makefile [new file with mode: 0644]
arch/sparc/vdso/checkundef.sh [new file with mode: 0644]
arch/sparc/vdso/vdso-layout.lds.S [new file with mode: 0644]
arch/sparc/vdso/vdso-note.S [new file with mode: 0644]
arch/sparc/vdso/vdso.lds.S [new file with mode: 0644]
arch/sparc/vdso/vdso2c.c [new file with mode: 0644]
arch/sparc/vdso/vdso2c.h [new file with mode: 0644]
arch/sparc/vdso/vma.c [new file with mode: 0644]

index 6726139bd2899038e77ae15f9901351773dd324b..7f17f1f2e684ff621a4160b32f11e734f3885edc 100644 (file)
@@ -3838,7 +3838,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                        HIGHMEM regardless of setting
                                        of CONFIG_HIGHPTE.
 
-       vdso=           [X86,SH]
+       vdso=           [X86,SH,SPARC64]
                        On X86_32, this is an alias for vdso32=.  Otherwise:
 
                        vdso=1: enable VDSO (the default)
index 675afa285ddb75d66d849f2d07188f330711d362..04423ded7525d921945c98cb687591cd99773ad9 100644 (file)
@@ -7,3 +7,4 @@ obj-y += mm/
 obj-y += math-emu/
 obj-y += net/
 obj-y += crypto/
+obj-y += vdso/
index eaee14637d93ec5db5f95931b52cc31d5a8c2108..303a0c8c9b55de22b769353d72e02f21fae71143 100644 (file)
@@ -74,6 +74,10 @@ install:
 archclean:
        $(Q)$(MAKE) $(clean)=$(boot)
 
+PHONY += vdso_install
+vdso_install:
+       $(Q)$(MAKE) $(build)=arch/sparc/vdso $@
+
 # This is the image used for packaging
 KBUILD_IMAGE := $(boot)/zImage
 
index 370ca1e71ffbc8dc2fb4cd9567b44f95cf61eee6..6f41fcc247fa9ad3701185d1aac4b7322f6ad54c 100644 (file)
@@ -209,4 +209,19 @@ do {       if ((ex).e_ident[EI_CLASS] == ELFCLASS32)       \
                        (current->personality & (~PER_MASK)));  \
 } while (0)
 
+extern unsigned int vdso64_enabled;
+
+#define ARCH_DLINFO                                                    \
+do {                                                                   \
+       if (vdso64_enabled)                                             \
+               NEW_AUX_ENT(AT_SYSINFO_EHDR,                            \
+                           (unsigned long)current->mm->context.vdso); \
+} while (0)
+
+struct linux_binprm;
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+                                      int uses_interp);
+
 #endif /* !(__ASM_SPARC64_ELF_H) */
index 70067ce184b16a9d91de2737bf804dbb89e94307..fd52dc2d23d90febe7613db57425afc95dd531da 100644 (file)
@@ -95,6 +95,7 @@ typedef struct {
        unsigned long           huge_pte_count;
        struct tsb_config       tsb_block[MM_NUM_TSBS];
        struct hv_tsb_descr     tsb_descr[MM_NUM_TSBS];
+       void                    *vdso;
 } mm_context_t;
 
 #endif /* !__ASSEMBLY__ */
index 8c2a8c937540ffebb206576d06334bb8ba1a2438..e2b779644714a22973fab8bcec294c8cad6b6700 100644 (file)
@@ -151,4 +151,6 @@ extern unsigned long PAGE_OFFSET;
 
 #include <asm-generic/getorder.h>
 
+#define __HAVE_ARCH_GATE_AREA 1
+
 #endif /* _SPARC64_PAGE_H */
index 45bf17cd0e407edc40b901cff255fd89ef43abd6..b2be7dcf9450f20cad4047d00a550d94fdaa6c49 100644 (file)
@@ -65,9 +65,11 @@ struct thread_info {
 #define init_thread_info       (init_thread_union.thread_info)
 #define init_stack             (init_thread_union.stack)
 
+#ifndef BUILD_VDSO
 /* how to get the thread information struct from C */
 register struct thread_info *current_thread_info_reg asm("g6");
 #define current_thread_info()   (current_thread_info_reg)
+#endif
 
 /*
  * thread information allocation
index 3bf3f9605fb31f6cb7030c0d516df9e4a279f979..440f881929f2328146f730ea5d742f7cda1d98df 100644 (file)
@@ -122,9 +122,11 @@ struct thread_info {
 #define init_thread_info       (init_thread_union.thread_info)
 #define init_stack             (init_thread_union.stack)
 
+#ifndef BUILD_VDSO
 /* how to get the thread information struct from C */
 register struct thread_info *current_thread_info_reg asm("g6");
 #define current_thread_info()  (current_thread_info_reg)
+#endif
 
 /* thread information allocation */
 #if PAGE_SHIFT == 13
@@ -225,7 +227,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
  */
 #define TS_RESTORE_SIGMASK     0x0001  /* restore signal mask in do_signal() */
 
-#ifndef __ASSEMBLY__
+#if !defined(__ASSEMBLY__) && !defined(BUILD_VDSO)
 #define HAVE_SET_RESTORE_SIGMASK       1
 static inline void set_restore_sigmask(void)
 {
diff --git a/arch/sparc/include/asm/vdso.h b/arch/sparc/include/asm/vdso.h
new file mode 100644 (file)
index 0000000..a290b98
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _ASM_SPARC_VDSO_H
+#define _ASM_SPARC_VDSO_H
+
+struct vdso_image {
+       void *data;
+       unsigned long size;   /* Always a multiple of PAGE_SIZE */
+       long sym_vvar_start;  /* Negative offset to the vvar area */
+};
+
+#ifdef CONFIG_SPARC64
+extern const struct vdso_image vdso_image_64_builtin;
+#endif
+
+#endif /* _ASM_SPARC_VDSO_H */
diff --git a/arch/sparc/include/asm/vvar.h b/arch/sparc/include/asm/vvar.h
new file mode 100644 (file)
index 0000000..ca2da3b
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _ASM_SPARC_VVAR_DATA_H
+#define _ASM_SPARC_VVAR_DATA_H
+
+struct vvar_data {
+};
+
+extern struct vvar_data *vvar_data;
+
+#endif /* _ASM_SPARC_VVAR_DATA_H */
index ad6f360261f609f23250b8f5a8ca0b66f68eb2b3..fb96d77a82d038e2bc5894a881d2d21490a098d0 100644 (file)
@@ -1,4 +1,6 @@
 #ifndef __ASMSPARC_AUXVEC_H
 #define __ASMSPARC_AUXVEC_H
 
+#define AT_SYSINFO_EHDR                33
+
 #endif /* !(__ASMSPARC_AUXVEC_H) */
diff --git a/arch/sparc/vdso/.gitignore b/arch/sparc/vdso/.gitignore
new file mode 100644 (file)
index 0000000..ef925b9
--- /dev/null
@@ -0,0 +1,3 @@
+vdso.lds
+vdso-image-*.c
+vdso2c
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
new file mode 100644 (file)
index 0000000..dd12809
--- /dev/null
@@ -0,0 +1,114 @@
+#
+# Building vDSO images for sparc64.
+#
+
+KBUILD_CFLAGS += $(DISABLE_LTO)
+
+VDSO64-$(CONFIG_SPARC64)       := y
+
+# files to link into the vdso
+vobjs-y := vdso-note.o
+
+# files to link into kernel
+obj-y                          += vma.o
+
+# vDSO images to build
+vdso_img-$(VDSO64-y)           += 64
+
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+
+$(obj)/vdso.o: $(obj)/vdso.so
+
+targets += vdso.lds $(vobjs-y)
+
+# Build the vDSO image C files and link them in.
+vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
+vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
+vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
+obj-y += $(vdso_img_objs)
+targets += $(vdso_img_cfiles)
+targets += $(vdso_img_sodbg)
+.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
+       $(vdso_img-y:%=$(obj)/vdso%.so)
+
+export CPPFLAGS_vdso.lds += -P -C
+
+VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
+                       -Wl,--no-undefined \
+                       -Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \
+                       $(DISABLE_LTO)
+
+$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+       $(call if_changed,vdso)
+
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+hostprogs-y                    += vdso2c
+
+quiet_cmd_vdso2c = VDSO2C  $@
+define cmd_vdso2c
+       $(obj)/vdso2c $< $(<:%.dbg=%) $@
+endef
+
+$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
+       $(call if_changed,vdso2c)
+
+#
+# Don't omit frame pointers for ease of userspace debugging, but do
+# optimize sibling calls.
+#
+CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables \
+       -m64 -ffixed-g2 -ffixed-g3 -fcall-used-g4 -fcall-used-g5 -ffixed-g6 \
+       -ffixed-g7 $(filter -g%,$(KBUILD_CFLAGS)) \
+       $(call cc-option, -fno-stack-protector) -fno-omit-frame-pointer \
+       -foptimize-sibling-calls -DBUILD_VDSO
+
+$(vobjs): KBUILD_CFLAGS += $(CFL)
+
+#
+# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+#
+CFLAGS_REMOVE_vdso-note.o = -pg
+
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg
+       $(call if_changed,objcopy)
+
+#
+# The DSO images are built using a special linker script.
+#
+quiet_cmd_vdso = VDSO    $@
+      cmd_vdso = $(CC) -nostdlib -o $@ \
+                      $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+                      -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
+                sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
+
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
+       $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic
+GCOV_PROFILE := n
+
+#
+# Install the unstripped copies of vdso*.so.  If our toolchain supports
+# build-id, install .build-id links as well.
+#
+quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
+define cmd_vdso_install
+       cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \
+       if readelf -n $< |grep -q 'Build ID'; then \
+         buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
+         first=`echo $$buildid | cut -b-2`; \
+         last=`echo $$buildid | cut -b3-`; \
+         mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
+         ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
+       fi
+endef
+
+vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
+
+$(MODLIB)/vdso: FORCE
+       @mkdir -p $(MODLIB)/vdso
+
+$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
+       $(call cmd,vdso_install)
+
+PHONY += vdso_install $(vdso_img_insttargets)
+vdso_install: $(vdso_img_insttargets) FORCE
diff --git a/arch/sparc/vdso/checkundef.sh b/arch/sparc/vdso/checkundef.sh
new file mode 100644 (file)
index 0000000..7ee90a9
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/sh
+nm="$1"
+file="$2"
+$nm "$file" | grep '^ *U' > /dev/null 2>&1
+if [ $? -eq 1 ]; then
+    exit 0
+else
+    echo "$file: undefined symbols found" >&2
+    exit 1
+fi
diff --git a/arch/sparc/vdso/vdso-layout.lds.S b/arch/sparc/vdso/vdso-layout.lds.S
new file mode 100644 (file)
index 0000000..02bd440
--- /dev/null
@@ -0,0 +1,100 @@
+#include <asm/page.h>
+
+/*
+ * Linker script for vDSO.  This is an ELF shared object prelinked to
+ * its virtual address, and with only one read-only segment.
+ * This script controls its layout.
+ */
+
+#if defined(BUILD_VDSO64)
+# define SHDR_SIZE 64
+#elif defined(BUILD_VDSO32)
+# define SHDR_SIZE 40
+#else
+# error unknown VDSO target
+#endif
+
+#define NUM_FAKE_SHDRS 7
+
+SECTIONS
+{
+       /*
+        * User/kernel shared data is before the vDSO.  This may be a little
+        * uglier than putting it after the vDSO, but it avoids issues with
+        * non-allocatable things that dangle past the end of the PT_LOAD
+        * segment.
+        */
+
+       vvar_start = . -PAGE_SIZE;
+       vvar_data = vvar_start;
+
+       . = SIZEOF_HEADERS;
+
+       .hash           : { *(.hash) }                  :text
+       .gnu.hash       : { *(.gnu.hash) }
+       .dynsym         : { *(.dynsym) }
+       .dynstr         : { *(.dynstr) }
+       .gnu.version    : { *(.gnu.version) }
+       .gnu.version_d  : { *(.gnu.version_d) }
+       .gnu.version_r  : { *(.gnu.version_r) }
+
+       .dynamic        : { *(.dynamic) }               :text   :dynamic
+
+       .rodata         : {
+               *(.rodata*)
+               *(.data*)
+               *(.sdata*)
+               *(.got.plt) *(.got)
+               *(.gnu.linkonce.d.*)
+               *(.bss*)
+               *(.dynbss*)
+               *(.gnu.linkonce.b.*)
+
+               /*
+                * Ideally this would live in a C file: kept in here for
+                * compatibility with x86-64.
+                */
+               VDSO_FAKE_SECTION_TABLE_START = .;
+               . = . + NUM_FAKE_SHDRS * SHDR_SIZE;
+               VDSO_FAKE_SECTION_TABLE_END = .;
+       }                                               :text
+
+       .fake_shstrtab  : { *(.fake_shstrtab) }         :text
+
+
+       .note           : { *(.note.*) }                :text   :note
+
+       .eh_frame_hdr   : { *(.eh_frame_hdr) }          :text   :eh_frame_hdr
+       .eh_frame       : { KEEP (*(.eh_frame)) }       :text
+
+
+       /*
+        * Text is well-separated from actual data: there's plenty of
+        * stuff that isn't used at runtime in between.
+        */
+
+       .text           : { *(.text*) }                 :text   =0x90909090,
+
+       /DISCARD/ : {
+               *(.discard)
+               *(.discard.*)
+               *(__bug_table)
+       }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME        0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+       text            PT_LOAD         FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+       dynamic         PT_DYNAMIC      FLAGS(4);               /* PF_R */
+       note            PT_NOTE         FLAGS(4);               /* PF_R */
+       eh_frame_hdr    PT_GNU_EH_FRAME;
+}
diff --git a/arch/sparc/vdso/vdso-note.S b/arch/sparc/vdso/vdso-note.S
new file mode 100644 (file)
index 0000000..79a071e
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+       .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/sparc/vdso/vdso.lds.S b/arch/sparc/vdso/vdso.lds.S
new file mode 100644 (file)
index 0000000..773ba8a
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * Linker script for 64-bit vDSO.
+ * We #include the file to define the layout details.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.
+ */
+
+#define BUILD_VDSO64
+
+#include "vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+       LINUX_2.6 {
+       local: *;
+       };
+}
diff --git a/arch/sparc/vdso/vdso2c.c b/arch/sparc/vdso/vdso2c.c
new file mode 100644 (file)
index 0000000..3e0441c
--- /dev/null
@@ -0,0 +1,216 @@
+/*
+ * vdso2c - A vdso image preparation tool
+ * Copyright (c) 2014 Andy Lutomirski and others
+ * Licensed under the GPL v2
+ *
+ * vdso2c requires stripped and unstripped input.  It would be trivial
+ * to fully strip the input in here, but, for reasons described below,
+ * we need to write a section table.  Doing this is more or less
+ * equivalent to dropping all non-allocatable sections, but it's
+ * easier to let objcopy handle that instead of doing it ourselves.
+ * If we ever need to do something fancier than what objcopy provides,
+ * it would be straightforward to add here.
+ *
+ * We keep a section table for a few reasons:
+ *
+ * Binutils has issues debugging the vDSO: it reads the section table to
+ * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
+ * would break build-id if we removed the section table.  Binutils
+ * also requires that shstrndx != 0.  See:
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
+ *
+ * elfutils might not look for PT_NOTE if there is a section table at
+ * all.  I don't know whether this matters for any practical purpose.
+ *
+ * For simplicity, rather than hacking up a partial section table, we
+ * just write a mostly complete one.  We omit non-dynamic symbols,
+ * though, since they're rather large.
+ *
+ * Once binutils gets fixed, we might be able to drop this for all but
+ * the 64-bit vdso, since build-id only works in kernel RPMs, and
+ * systems that update to new enough kernel RPMs will likely update
+ * binutils in sync.  build-id has never worked for home-built kernel
+ * RPMs without manual symlinking, and I suspect that no one ever does
+ * that.
+ */
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <err.h>
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <tools/be_byteshift.h>
+
+#include <linux/elf.h>
+#include <linux/types.h>
+
+const char *outfilename;
+
+/* Symbols that we need in vdso2c. */
+enum {
+       sym_vvar_start,
+       sym_VDSO_FAKE_SECTION_TABLE_START,
+       sym_VDSO_FAKE_SECTION_TABLE_END,
+};
+
+struct vdso_sym {
+       const char *name;
+       int export;
+};
+
+struct vdso_sym required_syms[] = {
+       [sym_vvar_start] = {"vvar_start", 1},
+       [sym_VDSO_FAKE_SECTION_TABLE_START] = {
+               "VDSO_FAKE_SECTION_TABLE_START", 0
+       },
+       [sym_VDSO_FAKE_SECTION_TABLE_END] = {
+               "VDSO_FAKE_SECTION_TABLE_END", 0
+       },
+};
+
+__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
+static void fail(const char *format, ...)
+{
+       va_list ap;
+       va_start(ap, format);
+       fprintf(stderr, "Error: ");
+       vfprintf(stderr, format, ap);
+       if (outfilename)
+               unlink(outfilename);
+       exit(1);
+       va_end(ap);
+}
+
+/*
+ * Evil macros for big-endian reads and writes
+ */
+#define GBE(x, bits, ifnot)                                            \
+       __builtin_choose_expr(                                          \
+               (sizeof(*(x)) == bits/8),                               \
+               (__typeof__(*(x)))get_unaligned_be##bits(x), ifnot)
+
+extern void bad_get_be(void);
+#define LAST_GBE(x)                                                    \
+       __builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_be())
+
+#define GET_BE(x)                                                      \
+       GBE(x, 64, GBE(x, 32, GBE(x, 16, LAST_GBE(x))))
+
+#define PBE(x, val, bits, ifnot)                                       \
+       __builtin_choose_expr(                                          \
+               (sizeof(*(x)) == bits/8),                               \
+               put_unaligned_be##bits((val), (x)), ifnot)
+
+extern void bad_put_be(void);
+#define LAST_PBE(x, val)                                               \
+       __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_be())
+
+#define PUT_BE(x, val)                                 \
+       PBE(x, val, 64, PBE(x, val, 32, PBE(x, val, 16, LAST_PBE(x, val))))
+
+#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
+
+#define BITSFUNC3(name, bits, suffix) name##bits##suffix
+#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
+#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
+
+#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+#define ELF_BITS 64
+#include "vdso2c.h"
+#undef ELF_BITS
+
+static void go(void *raw_addr, size_t raw_len,
+              void *stripped_addr, size_t stripped_len,
+              FILE *outfile, const char *name)
+{
+       Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
+
+       if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
+               go64(raw_addr, raw_len, stripped_addr, stripped_len,
+                    outfile, name);
+       } else {
+               fail("unknown ELF class\n");
+       }
+}
+
+static void map_input(const char *name, void **addr, size_t *len, int prot)
+{
+       off_t tmp_len;
+
+       int fd = open(name, O_RDONLY);
+       if (fd == -1)
+               err(1, "%s", name);
+
+       tmp_len = lseek(fd, 0, SEEK_END);
+       if (tmp_len == (off_t)-1)
+               err(1, "lseek");
+       *len = (size_t)tmp_len;
+
+       *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
+       if (*addr == MAP_FAILED)
+               err(1, "mmap");
+
+       close(fd);
+}
+
+int main(int argc, char **argv)
+{
+       size_t raw_len, stripped_len;
+       void *raw_addr, *stripped_addr;
+       FILE *outfile;
+       char *name, *tmp;
+       int namelen;
+
+       if (argc != 4) {
+               printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
+               return 1;
+       }
+
+       /*
+        * Figure out the struct name.  If we're writing to a .so file,
+        * generate raw output insted.
+        */
+       name = strdup(argv[3]);
+       namelen = strlen(name);
+       if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
+               name = NULL;
+       } else {
+               tmp = strrchr(name, '/');
+               if (tmp)
+                       name = tmp + 1;
+               tmp = strchr(name, '.');
+               if (tmp)
+                       *tmp = '\0';
+               for (tmp = name; *tmp; tmp++)
+                       if (*tmp == '-')
+                               *tmp = '_';
+       }
+
+       map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
+       map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
+
+       outfilename = argv[3];
+       outfile = fopen(outfilename, "w");
+       if (!outfile)
+               err(1, "%s", argv[2]);
+
+       go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
+
+       munmap(raw_addr, raw_len);
+       munmap(stripped_addr, stripped_len);
+       fclose(outfile);
+
+       return 0;
+}
diff --git a/arch/sparc/vdso/vdso2c.h b/arch/sparc/vdso/vdso2c.h
new file mode 100644 (file)
index 0000000..6e4068e
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * This file is included up to twice from vdso2c.c.  It generates code for
+ * 32-bit and 64-bit vDSOs.  We will eventually need both for 64-bit builds,
+ * since 32-bit vDSOs will then be built for 32-bit userspace.
+ */
+
+static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
+                        void *stripped_addr, size_t stripped_len,
+                        FILE *outfile, const char *name)
+{
+       int found_load = 0;
+       unsigned long load_size = -1;  /* Work around bogus warning */
+       unsigned long mapping_size;
+       ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
+       int i;
+       unsigned long j;
+       ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr;
+       ELF(Dyn) *dyn = 0, *dyn_end = 0;
+       const char *secstrings;
+       INT_BITS syms[NSYMS] = {};
+
+       ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff));
+
+       /* Walk the segment table. */
+       for (i = 0; i < GET_BE(&hdr->e_phnum); i++) {
+               if (GET_BE(&pt[i].p_type) == PT_LOAD) {
+                       if (found_load)
+                               fail("multiple PT_LOAD segs\n");
+
+                       if (GET_BE(&pt[i].p_offset) != 0 ||
+                           GET_BE(&pt[i].p_vaddr) != 0)
+                               fail("PT_LOAD in wrong place\n");
+
+                       if (GET_BE(&pt[i].p_memsz) != GET_BE(&pt[i].p_filesz))
+                               fail("cannot handle memsz != filesz\n");
+
+                       load_size = GET_BE(&pt[i].p_memsz);
+                       found_load = 1;
+               } else if (GET_BE(&pt[i].p_type) == PT_DYNAMIC) {
+                       dyn = raw_addr + GET_BE(&pt[i].p_offset);
+                       dyn_end = raw_addr + GET_BE(&pt[i].p_offset) +
+                               GET_BE(&pt[i].p_memsz);
+               }
+       }
+       if (!found_load)
+               fail("no PT_LOAD seg\n");
+
+       if (stripped_len < load_size)
+               fail("stripped input is too short\n");
+
+       /* Walk the dynamic table */
+       for (i = 0; dyn + i < dyn_end &&
+                    GET_BE(&dyn[i].d_tag) != DT_NULL; i++) {
+               typeof(dyn[i].d_tag) tag = GET_BE(&dyn[i].d_tag);
+               typeof(dyn[i].d_un.d_val) val = GET_BE(&dyn[i].d_un.d_val);
+               if ((tag == DT_RELSZ || tag == DT_RELASZ) && (val != 0))
+                       fail("vdso image contains dynamic relocations\n");
+       }
+
+       /* Walk the section table */
+       secstrings_hdr = raw_addr + GET_BE(&hdr->e_shoff) +
+               GET_BE(&hdr->e_shentsize)*GET_BE(&hdr->e_shstrndx);
+       secstrings = raw_addr + GET_BE(&secstrings_hdr->sh_offset);
+       for (i = 0; i < GET_BE(&hdr->e_shnum); i++) {
+               ELF(Shdr) *sh = raw_addr + GET_BE(&hdr->e_shoff) +
+                       GET_BE(&hdr->e_shentsize) * i;
+               if (GET_BE(&sh->sh_type) == SHT_SYMTAB)
+                       symtab_hdr = sh;
+       }
+
+       if (!symtab_hdr)
+               fail("no symbol table\n");
+
+       strtab_hdr = raw_addr + GET_BE(&hdr->e_shoff) +
+               GET_BE(&hdr->e_shentsize) * GET_BE(&symtab_hdr->sh_link);
+
+       /* Walk the symbol table */
+       for (i = 0;
+            i < GET_BE(&symtab_hdr->sh_size) / GET_BE(&symtab_hdr->sh_entsize);
+            i++) {
+               int k;
+               ELF(Sym) *sym = raw_addr + GET_BE(&symtab_hdr->sh_offset) +
+                       GET_BE(&symtab_hdr->sh_entsize) * i;
+               const char *name = raw_addr + GET_BE(&strtab_hdr->sh_offset) +
+                       GET_BE(&sym->st_name);
+
+               for (k = 0; k < NSYMS; k++) {
+                       if (!strcmp(name, required_syms[k].name)) {
+                               if (syms[k]) {
+                                       fail("duplicate symbol %s\n",
+                                            required_syms[k].name);
+                               }
+
+                               /*
+                                * Careful: we use negative addresses, but
+                                * st_value is unsigned, so we rely
+                                * on syms[k] being a signed type of the
+                                * correct width.
+                                */
+                               syms[k] = GET_BE(&sym->st_value);
+                       }
+               }
+       }
+
+       /* Validate mapping addresses. */
+       if (syms[sym_vvar_start] % 8192)
+               fail("vvar_begin must be a multiple of 8192\n");
+
+       if (!name) {
+               fwrite(stripped_addr, stripped_len, 1, outfile);
+               return;
+       }
+
+       mapping_size = (stripped_len + 8191) / 8192 * 8192;
+
+       fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
+       fprintf(outfile, "#include <asm/vdso.h>\n");
+       fprintf(outfile, "\n");
+       fprintf(outfile,
+               "static unsigned char raw_data[%lu] = {",
+               mapping_size);
+       for (j = 0; j < stripped_len; j++) {
+               if (j % 10 == 0)
+                       fprintf(outfile, "\n\t");
+               fprintf(outfile, "0x%02X, ",
+                       (int)((unsigned char *)stripped_addr)[j]);
+       }
+       fprintf(outfile, "\n};\n\n");
+
+       fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name);
+       fprintf(outfile, "\t.data = raw_data,\n");
+       fprintf(outfile, "\t.size = %lu,\n", mapping_size);
+       for (i = 0; i < NSYMS; i++) {
+               if (required_syms[i].export && syms[i])
+                       fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
+                               required_syms[i].name, (int64_t)syms[i]);
+       }
+       fprintf(outfile, "};\n");
+}
diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c
new file mode 100644 (file)
index 0000000..b49a527
--- /dev/null
@@ -0,0 +1,219 @@
+/*
+ * Set up the VMAs to tell the VM about the vDSO.
+ * Copyright 2007 Andi Kleen, SUSE Labs.
+ * Subject to the GPL, v.2
+ */
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/random.h>
+#include <linux/elf.h>
+#include <asm/vdso.h>
+#include <asm/vvar.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SPARC64
+unsigned int __read_mostly vdso64_enabled = 1;
+#endif
+
+static struct page **vdso_pages, **vvar_page;
+static struct vdso_image vdso_image_64;
+static struct vm_special_mapping vvar_mapping = {
+       .name = "[vvar]"
+};
+static struct vm_special_mapping vdso_mapping = {
+       .name = "[vdso]"
+};
+struct vvar_data *vvar_data;
+
+/*
+ * Allocate pages for the vdso and vvar, and copy in the vdso text from the
+ * kernel image.
+ */
+int __init init_vdso_image(struct vdso_image *image)
+{
+       int i;
+       int npages = (image->size) / PAGE_SIZE;
+       struct page *p;
+
+       /*
+        * First, the vdso text.  This is initialied data, an integral number of
+        * pages long.
+        */
+       BUG_ON(image->size % PAGE_SIZE != 0);
+
+       vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
+       vdso_mapping.pages = vdso_pages;
+
+       if (!vdso_pages)
+               goto oom;
+
+       for (i = 0; i < npages; i++) {
+               p = alloc_page(GFP_KERNEL);
+               if (!p)
+                       goto oom;
+
+               vdso_pages[i] = p;
+               copy_page(page_address(p), image->data + i * PAGE_SIZE);
+       }
+
+       /*
+        * Now the vvar page.  This is uninitialized data.
+        */
+
+       npages = (sizeof(struct vvar_data) / PAGE_SIZE) + 1;
+       BUG_ON(npages != 1);
+       vvar_page = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
+       vvar_mapping.pages = vvar_page;
+
+       if (!vvar_page)
+               goto oom;
+
+       p = alloc_page(GFP_KERNEL);
+       if (!p)
+               goto oom;
+
+       vvar_page[0] = p;
+       vvar_data = page_address(p);
+       memset(vvar_data, 0, PAGE_SIZE);
+
+       return 0;
+ oom:
+       printk(KERN_WARNING "Cannot allocate vdso\n");
+       vdso64_enabled = 0;
+       return -ENOMEM;
+}
+
+#ifdef CONFIG_SPARC64
+static int __init init_vdso(void)
+{
+       memcpy(&vdso_image_64, &vdso_image_64_builtin,
+              sizeof(struct vdso_image));
+       return init_vdso_image(&vdso_image_64);
+}
+subsys_initcall(init_vdso);
+#endif
+
+struct linux_binprm;
+
+/* Shuffle the vdso up a bit, randomly. */
+static unsigned long vdso_addr(unsigned long start, unsigned len)
+{
+       unsigned offset;
+
+       /* This loses some more bits than a modulo, but is cheaper */
+       offset = get_random_int() & (PTRS_PER_PTE - 1);
+       return start + (offset << PAGE_SHIFT);
+}
+
+static int map_vdso(const struct vdso_image *image)
+{
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long text_start, addr = 0;
+       int ret = 0;
+
+       down_write(&mm->mmap_sem);
+
+       /*
+        * First, get an unmapped region: then randomize it, and make sure that
+        * region is free.
+        */
+       if (current->flags & PF_RANDOMIZE) {
+               addr = get_unmapped_area(NULL, 0,
+                                        image->size - image->sym_vvar_start,
+                                        0, 0);
+               if (IS_ERR_VALUE(addr)) {
+                       ret = addr;
+                       goto up_fail;
+               }
+               addr = vdso_addr(addr, image->size - image->sym_vvar_start);
+       }
+       addr = get_unmapped_area(NULL, addr,
+                                image->size - image->sym_vvar_start, 0, 0);
+       if (IS_ERR_VALUE(addr)) {
+               ret = addr;
+               goto up_fail;
+       }
+
+       text_start = addr - image->sym_vvar_start;
+       current->mm->context.vdso = (void __user *)text_start;
+
+       /*
+        * MAYWRITE to allow gdb to COW and set breakpoints
+        */
+       vma = _install_special_mapping(mm,
+                                      text_start,
+                                      image->size,
+                                      VM_READ|VM_EXEC|
+                                      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+                                      &vdso_mapping);
+
+       if (IS_ERR(vma)) {
+               ret = PTR_ERR(vma);
+               goto up_fail;
+       }
+
+       vma = _install_special_mapping(mm,
+                                      addr,
+                                      -image->sym_vvar_start,
+                                      VM_READ|VM_MAYREAD,
+                                      &vvar_mapping);
+
+       if (IS_ERR(vma)) {
+               ret = PTR_ERR(vma);
+               goto up_fail;
+       }
+
+       if (ret)
+               goto up_fail;
+
+up_fail:
+       if (ret)
+               current->mm->context.vdso = NULL;
+
+       up_write(&mm->mmap_sem);
+       return ret;
+}
+
+#ifdef CONFIG_SPARC64
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+       if (!vdso64_enabled)
+               return 0;
+
+       return map_vdso(&vdso_image_64);
+}
+
+static __init int vdso_setup(char *s)
+{
+       vdso64_enabled = simple_strtoul(s, NULL, 0);
+       return 0;
+}
+__setup("vdso=", vdso_setup);
+#endif
+
+/*
+ * SPARC doesn't need a gate area, since we have no (obsolete) vsyscall page nor
+ * anything else at a similar fixed address.  However, kernels pre-3.17 assume
+ * that anything with an AT_SYSINFO_EHDR also has a gate area (because
+ * historically the gate area came first): we have to explicitly disable it.
+ */
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+       return 0;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+       return 0;
+}
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+       return NULL;
+}