--- /dev/null
+/*
+ * Written by Dave Hansen <dave.hansen@intel.com>
+ */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+#include "mpx-hw.h"
+
+unsigned long bounds_dir_global;
+
+#define mpx_dig_abort()        __mpx_dig_abort(__FILE__, __func__, __LINE__)
+static void inline __mpx_dig_abort(const char *file, const char *func, int line)
+{
+       fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
+       printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
+       abort();
+}
+
+/*
+ * run like this (BDIR finds the probably bounds directory):
+ *
+ *     BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
+ *             | head -1 | awk -F- '{print $1}')";
+ *     ./mpx-dig $pid 0x$BDIR
+ *
+ * NOTE:
+ *     assumes that the only 2097152-kb VMA is the bounds dir
+ */
+
+long nr_incore(void *ptr, unsigned long size_bytes)
+{
+       int i;
+       long ret = 0;
+       long vec_len = size_bytes / PAGE_SIZE;
+       unsigned char *vec = malloc(vec_len);
+       int incore_ret;
+
+       if (!vec)
+               mpx_dig_abort();
+
+       incore_ret = mincore(ptr, size_bytes, vec);
+       if (incore_ret) {
+               printf("mincore ret: %d\n", incore_ret);
+               perror("mincore");
+               mpx_dig_abort();
+       }
+       for (i = 0; i < vec_len; i++)
+               ret += vec[i];
+       free(vec);
+       return ret;
+}
+
+int open_proc(int pid, char *file)
+{
+       static char buf[100];
+       int fd;
+
+       snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
+       fd = open(&buf[0], O_RDONLY);
+       if (fd < 0)
+               perror(buf);
+
+       return fd;
+}
+
+struct vaddr_range {
+       unsigned long start;
+       unsigned long end;
+};
+struct vaddr_range *ranges;
+int nr_ranges_allocated;
+int nr_ranges_populated;
+int last_range = -1;
+
+int __pid_load_vaddrs(int pid)
+{
+       int ret = 0;
+       int proc_maps_fd = open_proc(pid, "maps");
+       char linebuf[10000];
+       unsigned long start;
+       unsigned long end;
+       char rest[1000];
+       FILE *f = fdopen(proc_maps_fd, "r");
+
+       if (!f)
+               mpx_dig_abort();
+       nr_ranges_populated = 0;
+       while (!feof(f)) {
+               char *readret = fgets(linebuf, sizeof(linebuf), f);
+               int parsed;
+
+               if (readret == NULL) {
+                       if (feof(f))
+                               break;
+                       mpx_dig_abort();
+               }
+
+               parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
+               if (parsed != 3)
+                       mpx_dig_abort();
+
+               dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
+               if (nr_ranges_populated >= nr_ranges_allocated) {
+                       ret = -E2BIG;
+                       break;
+               }
+               ranges[nr_ranges_populated].start = start;
+               ranges[nr_ranges_populated].end = end;
+               nr_ranges_populated++;
+       }
+       last_range = -1;
+       fclose(f);
+       close(proc_maps_fd);
+       return ret;
+}
+
+int pid_load_vaddrs(int pid)
+{
+       int ret;
+
+       dprintf2("%s(%d)\n", __func__, pid);
+       if (!ranges) {
+               nr_ranges_allocated = 4;
+               ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
+               dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
+                        nr_ranges_allocated, ranges);
+               assert(ranges != NULL);
+       }
+       do {
+               ret = __pid_load_vaddrs(pid);
+               if (!ret)
+                       break;
+               if (ret == -E2BIG) {
+                       dprintf2("%s(%d) need to realloc\n", __func__, pid);
+                       nr_ranges_allocated *= 2;
+                       ranges = realloc(ranges,
+                                       nr_ranges_allocated * sizeof(ranges[0]));
+                       dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
+                                       pid, nr_ranges_allocated, ranges);
+                       assert(ranges != NULL);
+                       dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
+               }
+       } while (1);
+
+       dprintf2("%s(%d) done\n", __func__, pid);
+
+       return ret;
+}
+
+static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
+{
+       if (vaddr < r->start)
+               return 0;
+       if (vaddr >= r->end)
+               return 0;
+       return 1;
+}
+
+static inline int vaddr_mapped_by_range(unsigned long vaddr)
+{
+       int i;
+
+       if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
+               return 1;
+
+       for (i = 0; i < nr_ranges_populated; i++) {
+               struct vaddr_range *r = &ranges[i];
+
+               if (vaddr_in_range(vaddr, r))
+                       continue;
+               last_range = i;
+               return 1;
+       }
+       return 0;
+}
+
+const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
+
+void *read_bounds_table_into_buf(unsigned long table_vaddr)
+{
+#ifdef MPX_DIG_STANDALONE
+       static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
+       off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
+       if (seek_ret != table_vaddr)
+               mpx_dig_abort();
+
+       int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
+       if (read_ret != sizeof(bt_buf))
+               mpx_dig_abort();
+       return &bt_buf;
+#else
+       return (void *)table_vaddr;
+#endif
+}
+
+int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
+               unsigned long bde_vaddr)
+{
+       unsigned long offset_inside_bt;
+       int nr_entries = 0;
+       int do_abort = 0;
+       char *bt_buf;
+
+       dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
+                       __func__, base_controlled_vaddr, bde_vaddr);
+
+       bt_buf = read_bounds_table_into_buf(table_vaddr);
+
+       dprintf4("%s() read done\n", __func__);
+
+       for (offset_inside_bt = 0;
+            offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
+            offset_inside_bt += bt_entry_size_bytes) {
+               unsigned long bt_entry_index;
+               unsigned long bt_entry_controls;
+               unsigned long this_bt_entry_for_vaddr;
+               unsigned long *bt_entry_buf;
+               int i;
+
+               dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
+                       offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
+               bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
+               if (!bt_buf) {
+                       printf("null bt_buf\n");
+                       mpx_dig_abort();
+               }
+               if (!bt_entry_buf) {
+                       printf("null bt_entry_buf\n");
+                       mpx_dig_abort();
+               }
+               dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
+                               bt_entry_buf);
+               if (!bt_entry_buf[0] &&
+                   !bt_entry_buf[1] &&
+                   !bt_entry_buf[2] &&
+                   !bt_entry_buf[3])
+                       continue;
+
+               nr_entries++;
+
+               bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
+               bt_entry_controls = sizeof(void *);
+               this_bt_entry_for_vaddr =
+                       base_controlled_vaddr + bt_entry_index*bt_entry_controls;
+               /*
+                * We sign extend vaddr bits 48->63 which effectively
+                * creates a hole in the virtual address space.
+                * This calculation corrects for the hole.
+                */
+               if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
+                       this_bt_entry_for_vaddr |= 0xffff800000000000;
+
+               if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
+                       printf("bt_entry_buf: %p\n", bt_entry_buf);
+                       printf("there is a bte for %lx but no mapping\n",
+                                       this_bt_entry_for_vaddr);
+                       printf("          bde   vaddr: %016lx\n", bde_vaddr);
+                       printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
+                       printf("          table_vaddr: %016lx\n", table_vaddr);
+                       printf("          entry vaddr: %016lx @ offset %lx\n",
+                               table_vaddr + offset_inside_bt, offset_inside_bt);
+                       do_abort = 1;
+                       mpx_dig_abort();
+               }
+               if (DEBUG_LEVEL < 4)
+                       continue;
+
+               printf("table entry[%lx]: ", offset_inside_bt);
+               for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
+                       printf("0x%016lx ", bt_entry_buf[i]);
+               printf("\n");
+       }
+       if (do_abort)
+               mpx_dig_abort();
+       dprintf4("%s() done\n",  __func__);
+       return nr_entries;
+}
+
+int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
+               int *nr_populated_bdes)
+{
+       unsigned long i;
+       int total_entries = 0;
+
+       dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
+                       len_bytes, bd_offset_bytes, buf + len_bytes);
+
+       for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
+               unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
+               unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
+               unsigned long bounds_dir_entry;
+               unsigned long bd_for_vaddr;
+               unsigned long bt_start;
+               unsigned long bt_tail;
+               int nr_entries;
+
+               dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
+                               bounds_dir_entry_ptr);
+
+               bounds_dir_entry = *bounds_dir_entry_ptr;
+               if (!bounds_dir_entry) {
+                       dprintf4("no bounds dir at index 0x%lx / 0x%lx "
+                                "start at offset:%lx %lx\n", bd_index, bd_index,
+                                       bd_offset_bytes, i);
+                       continue;
+               }
+               dprintf3("found bounds_dir_entry: 0x%lx @ "
+                        "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
+                                       &buf[i]);
+               /* mask off the enable bit: */
+               bounds_dir_entry &= ~0x1;
+               (*nr_populated_bdes)++;
+               dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
+               dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
+
+               bt_start = bounds_dir_entry;
+               bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
+               if (!vaddr_mapped_by_range(bt_start)) {
+                       printf("bounds directory 0x%lx points to nowhere\n",
+                                       bounds_dir_entry);
+                       mpx_dig_abort();
+               }
+               if (!vaddr_mapped_by_range(bt_tail)) {
+                       printf("bounds directory end 0x%lx points to nowhere\n",
+                                       bt_tail);
+                       mpx_dig_abort();
+               }
+               /*
+                * Each bounds directory entry controls 1MB of virtual address
+                * space.  This variable is the virtual address in the process
+                * of the beginning of the area controlled by this bounds_dir.
+                */
+               bd_for_vaddr = bd_index * (1UL<<20);
+
+               nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
+                               bounds_dir_global+bd_offset_bytes+i);
+               total_entries += nr_entries;
+               dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
+                        "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
+                               bd_index, buf+i,
+                               bounds_dir_entry, nr_entries, total_entries,
+                               bd_for_vaddr, bd_for_vaddr + (1UL<<20));
+       }
+       dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
+                       bd_offset_bytes);
+       return total_entries;
+}
+
+int proc_pid_mem_fd = -1;
+
+void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
+                          long buffer_size_bytes, void *buffer)
+{
+       unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
+       int read_ret;
+       off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
+
+       if (seek_ret != seekto)
+               mpx_dig_abort();
+
+       read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
+       /* there shouldn't practically be short reads of /proc/$pid/mem */
+       if (read_ret != buffer_size_bytes)
+               mpx_dig_abort();
+
+       return buffer;
+}
+void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
+                          long buffer_size_bytes, void *buffer)
+
+{
+       unsigned char vec[buffer_size_bytes / PAGE_SIZE];
+       char *dig_bounds_dir_ptr =
+               (void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
+       /*
+        * use mincore() to quickly find the areas of the bounds directory
+        * that have memory and thus will be worth scanning.
+        */
+       int incore_ret;
+
+       int incore = 0;
+       int i;
+
+       dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
+
+       incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
+       if (incore_ret) {
+               printf("mincore ret: %d\n", incore_ret);
+               perror("mincore");
+               mpx_dig_abort();
+       }
+       for (i = 0; i < sizeof(vec); i++)
+               incore += vec[i];
+       dprintf4("%s() total incore: %d\n", __func__, incore);
+       if (!incore)
+               return NULL;
+       dprintf3("%s() total incore: %d\n", __func__, incore);
+       return dig_bounds_dir_ptr;
+}
+
+int inspect_pid(int pid)
+{
+       static int dig_nr;
+       long offset_inside_bounds_dir;
+       char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
+       char *dig_bounds_dir_ptr;
+       int total_entries = 0;
+       int nr_populated_bdes = 0;
+       int inspect_self;
+
+       if (getpid() == pid) {
+               dprintf4("inspecting self\n");
+               inspect_self = 1;
+       } else {
+               dprintf4("inspecting pid %d\n", pid);
+               mpx_dig_abort();
+       }
+
+       for (offset_inside_bounds_dir = 0;
+            offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
+            offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
+               static int bufs_skipped;
+               int this_entries;
+
+               if (inspect_self) {
+                       dig_bounds_dir_ptr =
+                               fill_bounds_dir_buf_self(offset_inside_bounds_dir,
+                                                        sizeof(bounds_dir_buf),
+                                                        &bounds_dir_buf[0]);
+               } else {
+                       dig_bounds_dir_ptr =
+                               fill_bounds_dir_buf_other(offset_inside_bounds_dir,
+                                                         sizeof(bounds_dir_buf),
+                                                         &bounds_dir_buf[0]);
+               }
+               if (!dig_bounds_dir_ptr) {
+                       bufs_skipped++;
+                       continue;
+               }
+               this_entries = search_bd_buf(dig_bounds_dir_ptr,
+                                       sizeof(bounds_dir_buf),
+                                       offset_inside_bounds_dir,
+                                       &nr_populated_bdes);
+               total_entries += this_entries;
+       }
+       printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
+                       total_entries, nr_populated_bdes);
+       return total_entries + nr_populated_bdes;
+}
+
+#ifdef MPX_DIG_REMOTE
+int main(int argc, char **argv)
+{
+       int err;
+       char *c;
+       unsigned long bounds_dir_entry;
+       int pid;
+
+       printf("mpx-dig starting...\n");
+       err = sscanf(argv[1], "%d", &pid);
+       printf("parsing: '%s', err: %d\n", argv[1], err);
+       if (err != 1)
+               mpx_dig_abort();
+
+       err = sscanf(argv[2], "%lx", &bounds_dir_global);
+       printf("parsing: '%s': %d\n", argv[2], err);
+       if (err != 1)
+               mpx_dig_abort();
+
+       proc_pid_mem_fd = open_proc(pid, "mem");
+       if (proc_pid_mem_fd < 0)
+               mpx_dig_abort();
+
+       inspect_pid(pid);
+       return 0;
+}
+#endif
+
+long inspect_me(struct mpx_bounds_dir *bounds_dir)
+{
+       int pid = getpid();
+
+       pid_load_vaddrs(pid);
+       bounds_dir_global = (unsigned long)bounds_dir;
+       dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
+       return inspect_pid(pid);
+}
 
--- /dev/null
+/*
+ * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
+ *
+ * Written by:
+ * "Ren, Qiaowei" <qiaowei.ren@intel.com>
+ * "Wei, Gang" <gang.wei@intel.com>
+ * "Hansen, Dave" <dave.hansen@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2.
+ */
+
+/*
+ * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
+ *            it works on 32-bit.
+ */
+
+int inspect_every_this_many_mallocs = 100;
+int zap_all_every_this_many_mallocs = 1000;
+
+#define _GNU_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "mpx-hw.h"
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline)
+#endif
+
+#ifndef TEST_DURATION_SECS
+#define TEST_DURATION_SECS 3
+#endif
+
+void write_int_to(char *prefix, char *file, int int_to_write)
+{
+       char buf[100];
+       int fd = open(file, O_RDWR);
+       int len;
+       int ret;
+
+       assert(fd >= 0);
+       len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
+       assert(len >= 0);
+       assert(len < sizeof(buf));
+       ret = write(fd, buf, len);
+       assert(ret == len);
+       ret = close(fd);
+       assert(!ret);
+}
+
+void write_pid_to(char *prefix, char *file)
+{
+       write_int_to(prefix, file, getpid());
+}
+
+void trace_me(void)
+{
+/* tracing events dir */
+#define TED "/sys/kernel/debug/tracing/events/"
+/*
+       write_pid_to("common_pid=", TED "signal/filter");
+       write_pid_to("common_pid=", TED "exceptions/filter");
+       write_int_to("", TED "signal/enable", 1);
+       write_int_to("", TED "exceptions/enable", 1);
+*/
+       write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
+       write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
+}
+
+#define test_failed() __test_failed(__FILE__, __LINE__)
+static void __test_failed(char *f, int l)
+{
+       fprintf(stderr, "abort @ %s::%d\n", f, l);
+       abort();
+}
+
+/* Error Printf */
+#define eprintf(args...)       fprintf(stderr, args)
+
+#ifdef __i386__
+
+/* i386 directory size is 4MB */
+#define REG_IP_IDX     REG_EIP
+#define REX_PREFIX
+
+#define XSAVE_OFFSET_IN_FPMEM  sizeof(struct _libc_fpstate)
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+               unsigned int *ecx, unsigned int *edx)
+{
+       /* ecx is often an input as well as an output. */
+       asm volatile(
+               "push %%ebx;"
+               "cpuid;"
+               "mov %%ebx, %1;"
+               "pop %%ebx"
+               : "=a" (*eax),
+                 "=g" (*ebx),
+                 "=c" (*ecx),
+                 "=d" (*edx)
+               : "0" (*eax), "2" (*ecx));
+}
+
+#else /* __i386__ */
+
+#define REG_IP_IDX     REG_RIP
+#define REX_PREFIX "0x48, "
+
+#define XSAVE_OFFSET_IN_FPMEM  0
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+               unsigned int *ecx, unsigned int *edx)
+{
+       /* ecx is often an input as well as an output. */
+       asm volatile(
+               "cpuid;"
+               : "=a" (*eax),
+                 "=b" (*ebx),
+                 "=c" (*ecx),
+                 "=d" (*edx)
+               : "0" (*eax), "2" (*ecx));
+}
+
+#endif /* !__i386__ */
+
+struct xsave_hdr_struct {
+       uint64_t xstate_bv;
+       uint64_t reserved1[2];
+       uint64_t reserved2[5];
+} __attribute__((packed));
+
+struct bndregs_struct {
+       uint64_t bndregs[8];
+} __attribute__((packed));
+
+struct bndcsr_struct {
+       uint64_t cfg_reg_u;
+       uint64_t status_reg;
+} __attribute__((packed));
+
+struct xsave_struct {
+       uint8_t fpu_sse[512];
+       struct xsave_hdr_struct xsave_hdr;
+       uint8_t ymm[256];
+       uint8_t lwp[128];
+       struct bndregs_struct bndregs;
+       struct bndcsr_struct bndcsr;
+} __attribute__((packed));
+
+uint8_t __attribute__((__aligned__(64))) buffer[4096];
+struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
+
+uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
+struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
+
+uint64_t num_bnd_chk;
+
+static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
+{
+       uint32_t lmask = mask;
+       uint32_t hmask = mask >> 32;
+
+       asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
+                    : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+                    :   "memory");
+}
+
+static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
+{
+       uint32_t lmask = mask;
+       uint32_t hmask = mask >> 32;
+       unsigned char *fx = _fx;
+
+       asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+                    : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+                    :   "memory");
+}
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+       uint32_t eax, edx;
+
+       asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+                    : "=a" (eax), "=d" (edx)
+                    : "c" (index));
+       return eax + ((uint64_t)edx << 32);
+}
+
+static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
+{
+       memset(buffer, 0, sizeof(buffer));
+       memcpy(buffer,
+               (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
+               sizeof(struct xsave_struct));
+
+       return xsave_buf->bndcsr.status_reg;
+}
+
+#include <pthread.h>
+
+static uint8_t *get_next_inst_ip(uint8_t *addr)
+{
+       uint8_t *ip = addr;
+       uint8_t sib;
+       uint8_t rm;
+       uint8_t mod;
+       uint8_t base;
+       uint8_t modrm;
+
+       /* determine the prefix. */
+       switch(*ip) {
+       case 0xf2:
+       case 0xf3:
+       case 0x66:
+               ip++;
+               break;
+       }
+
+       /* look for rex prefix */
+       if ((*ip & 0x40) == 0x40)
+               ip++;
+
+       /* Make sure we have a MPX instruction. */
+       if (*ip++ != 0x0f)
+               return addr;
+
+       /* Skip the op code byte. */
+       ip++;
+
+       /* Get the modrm byte. */
+       modrm = *ip++;
+
+       /* Break it down into parts. */
+       rm = modrm & 7;
+       mod = (modrm >> 6);
+
+       /* Init the parts of the address mode. */
+       base = 8;
+
+       /* Is it a mem mode? */
+       if (mod != 3) {
+               /* look for scaled indexed addressing */
+               if (rm == 4) {
+                       /* SIB addressing */
+                       sib = *ip++;
+                       base = sib & 7;
+                       switch (mod) {
+                       case 0:
+                               if (base == 5)
+                                       ip += 4;
+                               break;
+
+                       case 1:
+                               ip++;
+                               break;
+
+                       case 2:
+                               ip += 4;
+                               break;
+                       }
+
+               } else {
+                       /* MODRM addressing */
+                       switch (mod) {
+                       case 0:
+                               /* DISP32 addressing, no base */
+                               if (rm == 5)
+                                       ip += 4;
+                               break;
+
+                       case 1:
+                               ip++;
+                               break;
+
+                       case 2:
+                               ip += 4;
+                               break;
+                       }
+               }
+       }
+       return ip;
+}
+
+#ifdef si_lower
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+       return si->si_lower;
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+       return si->si_upper;
+}
+#else
+static inline void **__si_bounds_hack(siginfo_t *si)
+{
+       void *sigfault = &si->_sifields._sigfault;
+       void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
+       void **__si_lower = end_sigfault;
+
+       return __si_lower;
+}
+
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+       return *__si_bounds_hack(si);
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+       return (*__si_bounds_hack(si)) + sizeof(void *);
+}
+#endif
+
+static int br_count;
+static int expected_bnd_index = -1;
+uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
+unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
+
+/*
+ * The kernel is supposed to provide some information about the bounds
+ * exception in the siginfo.  It should match what we have in the bounds
+ * registers that we are checking against.  Just check against the shadow copy
+ * since it is easily available, and we also check that *it* matches the real
+ * registers.
+ */
+void check_siginfo_vs_shadow(siginfo_t* si)
+{
+       int siginfo_ok = 1;
+       void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
+       void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
+
+       if ((expected_bnd_index < 0) ||
+           (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
+               fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
+                       expected_bnd_index);
+               exit(6);
+       }
+       if (__si_bounds_lower(si) != shadow_lower)
+               siginfo_ok = 0;
+       if (__si_bounds_upper(si) != shadow_upper)
+               siginfo_ok = 0;
+
+       if (!siginfo_ok) {
+               fprintf(stderr, "ERROR: siginfo bounds do not match "
+                       "shadow bounds for register %d\n", expected_bnd_index);
+               exit(7);
+       }
+}
+
+void handler(int signum, siginfo_t *si, void *vucontext)
+{
+       int i;
+       ucontext_t *uctxt = vucontext;
+       int trapno;
+       unsigned long ip;
+
+       dprintf1("entered signal handler\n");
+
+       trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+       ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+
+       if (trapno == 5) {
+               typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
+               uint64_t status = read_mpx_status_sig(uctxt);
+               uint64_t br_reason =  status & 0x3;
+
+               br_count++;
+               dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
+
+#define __SI_FAULT      (3 << 16)
+#define SEGV_BNDERR     (__SI_FAULT|3)  /* failed address bound checks */
+
+               dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
+                               status, ip, br_reason);
+               dprintf2("si_signo: %d\n", si->si_signo);
+               dprintf2("  signum: %d\n", signum);
+               dprintf2("info->si_code == SEGV_BNDERR: %d\n",
+                               (si->si_code == SEGV_BNDERR));
+               dprintf2("info->si_code: %d\n", si->si_code);
+               dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
+               dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
+
+               check_siginfo_vs_shadow(si);
+
+               for (i = 0; i < 8; i++)
+                       dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
+               switch (br_reason) {
+               case 0: /* traditional BR */
+                       fprintf(stderr,
+                               "Undefined status with bound exception:%jx\n",
+                                status);
+                       exit(5);
+               case 1: /* #BR MPX bounds exception */
+                       /* these are normal and we expect to see them */
+                       dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
+                               status, (void *)ip, si->si_addr);
+                       num_bnd_chk++;
+                       uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+                               (greg_t)get_next_inst_ip((uint8_t *)ip);
+                       break;
+               case 2:
+                       fprintf(stderr, "#BR status == 2, missing bounds table,"
+                                       "kernel should have handled!!\n");
+                       exit(4);
+                       break;
+               default:
+                       fprintf(stderr, "bound check error: status 0x%jx at %p\n",
+                               status, (void *)ip);
+                       num_bnd_chk++;
+                       uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+                               (greg_t)get_next_inst_ip((uint8_t *)ip);
+                       fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
+                       exit(3);
+               }
+       } else if (trapno == 14) {
+               eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
+                       trapno, ip);
+               eprintf("si_addr %p\n", si->si_addr);
+               eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+               test_failed();
+       } else {
+               eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
+               eprintf("si_addr %p\n", si->si_addr);
+               eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+               test_failed();
+       }
+}
+
+static inline void cpuid_count(unsigned int op, int count,
+                              unsigned int *eax, unsigned int *ebx,
+                              unsigned int *ecx, unsigned int *edx)
+{
+       *eax = op;
+       *ecx = count;
+       __cpuid(eax, ebx, ecx, edx);
+}
+
+#define XSTATE_CPUID       0x0000000d
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature_bit {
+       XSTATE_BIT_FP,
+       XSTATE_BIT_SSE,
+       XSTATE_BIT_YMM,
+       XSTATE_BIT_BNDREGS,
+       XSTATE_BIT_BNDCSR,
+       XSTATE_BIT_OPMASK,
+       XSTATE_BIT_ZMM_Hi256,
+       XSTATE_BIT_Hi16_ZMM,
+
+       XFEATURES_NR_MAX,
+};
+
+#define XSTATE_FP             (1 << XSTATE_BIT_FP)
+#define XSTATE_SSE           (1 << XSTATE_BIT_SSE)
+#define XSTATE_YMM           (1 << XSTATE_BIT_YMM)
+#define XSTATE_BNDREGS   (1 << XSTATE_BIT_BNDREGS)
+#define XSTATE_BNDCSR     (1 << XSTATE_BIT_BNDCSR)
+#define XSTATE_OPMASK     (1 << XSTATE_BIT_OPMASK)
+#define XSTATE_ZMM_Hi256       (1 << XSTATE_BIT_ZMM_Hi256)
+#define XSTATE_Hi16_ZMM         (1 << XSTATE_BIT_Hi16_ZMM)
+
+#define MPX_XSTATES            (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
+
+bool one_bit(unsigned int x, int bit)
+{
+       return !!(x & (1<<bit));
+}
+
+void print_state_component(int state_bit_nr, char *name)
+{
+       unsigned int eax, ebx, ecx, edx;
+       unsigned int state_component_size;
+       unsigned int state_component_supervisor;
+       unsigned int state_component_user;
+       unsigned int state_component_aligned;
+
+       /* See SDM Section 13.2 */
+       cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
+       assert(eax || ebx || ecx);
+       state_component_size = eax;
+       state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
+       state_component_user = !one_bit(ecx, 0);
+       state_component_aligned = one_bit(ecx, 1);
+       printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
+               name,
+               state_component_size,       state_component_user,
+               state_component_supervisor, state_component_aligned);
+
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
+#define XSAVE_FEATURE_BIT       (26)  /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define OSXSAVE_FEATURE_BIT     (27) /* XSAVE enabled in the OS */
+
+bool check_mpx_support(void)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
+
+       /* We can't do much without XSAVE, so just make these assert()'s */
+       if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
+               fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
+               exit(0);
+       }
+
+       if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
+               fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
+               exit(0);
+       }
+
+       /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
+       /* Is this redundant with the feature bit checks? */
+       cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
+       if (eax < XSTATE_CPUID) {
+               fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
+                               " can not run MPX tests\n");
+               exit(0);
+       }
+
+       printf("XSAVE is supported by HW & OS\n");
+
+       cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+
+       printf("XSAVE processor supported state mask: 0x%x\n", eax);
+       printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
+
+       /* Make sure that the MPX states are enabled in in XCR0 */
+       if ((eax & MPX_XSTATES) != MPX_XSTATES) {
+               fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
+               exit(0);
+       }
+
+       /* Make sure the MPX states are supported by XSAVE* */
+       if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
+               fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
+                               "can not run MPX tests\n");
+               exit(0);
+       }
+
+       print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
+       print_state_component(XSTATE_BIT_BNDCSR,  "BNDCSR");
+
+       return true;
+}
+
+void enable_mpx(void *l1base)
+{
+       /* enable point lookup */
+       memset(buffer, 0, sizeof(buffer));
+       xrstor_state(xsave_buf, 0x18);
+
+       xsave_buf->xsave_hdr.xstate_bv = 0x10;
+       xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
+       xsave_buf->bndcsr.status_reg = 0;
+
+       dprintf2("bf xrstor\n");
+       dprintf2("xsave cndcsr: status %jx, configu %jx\n",
+              xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+       xrstor_state(xsave_buf, 0x18);
+       dprintf2("after xrstor\n");
+
+       xsave_state_1(xsave_buf, 0x18);
+
+       dprintf1("xsave bndcsr: status %jx, configu %jx\n",
+              xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+}
+
+#include <sys/prctl.h>
+
+struct mpx_bounds_dir *bounds_dir_ptr;
+
+unsigned long __bd_incore(const char *func, int line)
+{
+       unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
+       return ret;
+}
+#define bd_incore() __bd_incore(__func__, __LINE__)
+
+void check_clear(void *ptr, unsigned long sz)
+{
+       unsigned long *i;
+
+       for (i = ptr; (void *)i < ptr + sz; i++) {
+               if (*i) {
+                       dprintf1("%p is NOT clear at %p\n", ptr, i);
+                       assert(0);
+               }
+       }
+       dprintf1("%p is clear for %lx\n", ptr, sz);
+}
+
+void check_clear_bd(void)
+{
+       check_clear(bounds_dir_ptr, 2UL << 30);
+}
+
+#define USE_MALLOC_FOR_BOUNDS_DIR 1
+bool process_specific_init(void)
+{
+       unsigned long size;
+       unsigned long *dir;
+       /* Guarantee we have the space to align it, add padding: */
+       unsigned long pad = getpagesize();
+
+       size = 2UL << 30; /* 2GB */
+       if (sizeof(unsigned long) == 4)
+               size = 4UL << 20; /* 4MB */
+       dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
+
+       if (USE_MALLOC_FOR_BOUNDS_DIR) {
+               unsigned long _dir;
+
+               dir = malloc(size + pad);
+               assert(dir);
+               _dir = (unsigned long)dir;
+               _dir += 0xfffUL;
+               _dir &= ~0xfffUL;
+               dir = (void *)_dir;
+       } else {
+               /*
+                * This makes debugging easier because the address
+                * calculations are simpler:
+                */
+               dir = mmap((void *)0x200000000000, size + pad,
+                               PROT_READ|PROT_WRITE,
+                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+               if (dir == (void *)-1) {
+                       perror("unable to allocate bounds directory");
+                       abort();
+               }
+               check_clear(dir, size);
+       }
+       bounds_dir_ptr = (void *)dir;
+       madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
+       bd_incore();
+       dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
+                       (char *)bounds_dir_ptr + size);
+       check_clear(dir, size);
+       enable_mpx(dir);
+       check_clear(dir, size);
+       if (prctl(43, 0, 0, 0, 0)) {
+               printf("no MPX support\n");
+               abort();
+               return false;
+       }
+       return true;
+}
+
+bool process_specific_finish(void)
+{
+       if (prctl(44)) {
+               printf("no MPX support\n");
+               return false;
+       }
+       return true;
+}
+
+void setup_handler()
+{
+       int r, rs;
+       struct sigaction newact;
+       struct sigaction oldact;
+
+       /* #BR is mapped to sigsegv */
+       int signum  = SIGSEGV;
+
+       newact.sa_handler = 0;   /* void(*)(int)*/
+       newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
+
+       /*sigset_t - signals to block while in the handler */
+       /* get the old signal mask. */
+       rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+       assert(rs == 0);
+
+       /* call sa_sigaction, not sa_handler*/
+       newact.sa_flags = SA_SIGINFO;
+
+       newact.sa_restorer = 0;  /* void(*)(), obsolete */
+       r = sigaction(signum, &newact, &oldact);
+       assert(r == 0);
+}
+
+void mpx_prepare(void)
+{
+       dprintf2("%s()\n", __func__);
+       setup_handler();
+       process_specific_init();
+}
+
+void mpx_cleanup(void)
+{
+       printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
+       process_specific_finish();
+}
+
+/*-------------- the following is test case ---------------*/
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+uint64_t num_lower_brs;
+uint64_t num_upper_brs;
+
+#define MPX_CONFIG_OFFSET 1024
+#define MPX_BOUNDS_OFFSET 960
+#define MPX_HEADER_OFFSET 512
+#define MAX_ADDR_TESTED (1<<28)
+#define TEST_ROUNDS 100
+
+/*
+      0F 1A /r BNDLDX-Load
+      0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
+   66 0F 1A /r BNDMOV bnd1, bnd2/m128
+   66 0F 1B /r BNDMOV bnd1/m128, bnd2
+   F2 0F 1A /r BNDCU bnd, r/m64
+   F2 0F 1B /r BNDCN bnd, r/m64
+   F3 0F 1A /r BNDCL bnd, r/m64
+   F3 0F 1B /r BNDMK bnd, m64
+*/
+
+static __always_inline void xsave_state(void *_fx, uint64_t mask)
+{
+       uint32_t lmask = mask;
+       uint32_t hmask = mask >> 32;
+       unsigned char *fx = _fx;
+
+       asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+                    : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+                    :   "memory");
+}
+
+static __always_inline void mpx_clear_bnd0(void)
+{
+       long size = 0;
+       void *ptr = NULL;
+       /* F3 0F 1B /r BNDMK bnd, m64                   */
+       /* f3 0f 1b 04 11    bndmk  (%rcx,%rdx,1),%bnd0 */
+       asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+                    : : "c" (ptr), "d" (size-1)
+                    :   "memory");
+}
+
+static __always_inline void mpx_make_bound_helper(unsigned long ptr,
+               unsigned long size)
+{
+       /* F3 0F 1B /r          BNDMK bnd, m64                  */
+       /* f3 0f 1b 04 11       bndmk  (%rcx,%rdx,1),%bnd0      */
+       asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+                    : : "c" (ptr), "d" (size-1)
+                    :   "memory");
+}
+
+static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
+{
+       /* F3 0F 1A /r  NDCL bnd, r/m64                 */
+       /* f3 0f 1a 01  bndcl  (%rcx),%bnd0             */
+       asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
+                    : : "c" (ptr)
+                    :   "memory");
+}
+
+static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
+{
+       /* F2 0F 1A /r  BNDCU bnd, r/m64        */
+       /* f2 0f 1a 01  bndcu  (%rcx),%bnd0     */
+       asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
+                    : : "c" (ptr)
+                    :   "memory");
+}
+
+static __always_inline void mpx_movbndreg_helper()
+{
+       /* 66 0F 1B /r  BNDMOV bnd1/m128, bnd2  */
+       /* 66 0f 1b c2  bndmov %bnd0,%bnd2      */
+
+       asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
+}
+
+static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
+{
+       /* 66 0F 1B /r  BNDMOV bnd1/m128, bnd2  */
+       /* 66 0f 1b 01  bndmov %bnd0,(%rcx)     */
+       asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
+                    : : "c" (mem)
+                    :   "memory");
+}
+
+static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
+{
+       /* 66 0F 1A /r  BNDMOV bnd1, bnd2/m128  */
+       /* 66 0f 1a 01  bndmov (%rcx),%bnd0     */
+       asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
+                    : : "c" (mem)
+                    :   "memory");
+}
+
+static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
+               unsigned long ptr_val)
+{
+       /* 0F 1B /r     BNDSTX-Store Extended Bounds Using Address Translation  */
+       /* 0f 1b 04 11  bndstx %bnd0,(%rcx,%rdx,1)                              */
+       asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
+                    : : "c" (ptr_addr), "d" (ptr_val)
+                    :   "memory");
+}
+
+static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
+               unsigned long ptr_val)
+{
+       /* 0F 1A /r     BNDLDX-Load                     */
+       /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0      */
+       asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
+                    : : "c" (ptr_addr), "d" (ptr_val)
+                    :   "memory");
+}
+
+void __print_context(void *__print_xsave_buffer, int line)
+{
+       uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
+       uint64_t *cfg    = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
+
+       int i;
+       eprintf("%s()::%d\n", "print_context", line);
+       for (i = 0; i < 4; i++) {
+               eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
+                      (unsigned long)bounds[i*2],
+                      ~(unsigned long)bounds[i*2+1],
+                       (unsigned long)bounds[i*2+1]);
+       }
+
+       eprintf("cpcfg: %jx  cpstatus: %jx\n", cfg[0], cfg[1]);
+}
+#define print_context(x) __print_context(x, __LINE__)
+#ifdef DEBUG
+#define dprint_context(x) print_context(x)
+#else
+#define dprint_context(x) do{}while(0)
+#endif
+
+void init()
+{
+       int i;
+
+       srand((unsigned int)time(NULL));
+
+       for (i = 0; i < 4; i++) {
+               shadow_plb[i][0] = 0;
+               shadow_plb[i][1] = ~(unsigned long)0;
+       }
+}
+
+long int __mpx_random(int line)
+{
+#ifdef NOT_SO_RANDOM
+       static long fake = 722122311;
+       fake += 563792075;
+       return fakse;
+#else
+       return random();
+#endif
+}
+#define mpx_random() __mpx_random(__LINE__)
+
+uint8_t *get_random_addr()
+{
+       uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
+       return (addr - (unsigned long)addr % sizeof(uint8_t *));
+}
+
+static inline bool compare_context(void *__xsave_buffer)
+{
+       uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
+
+       int i;
+       for (i = 0; i < 4; i++) {
+               dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
+                      i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+                      i, (unsigned long)bounds[i*2],     ~(unsigned long)bounds[i*2+1]);
+               if ((shadow_plb[i][0] != bounds[i*2]) ||
+                   (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
+                       eprintf("ERROR comparing shadow to real bound register %d\n", i);
+                       eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
+                              (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+                              (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
+void mkbnd_shadow(uint8_t *ptr, int index, long offset)
+{
+       uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+       uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
+       *lower = (unsigned long)ptr;
+       *upper = (unsigned long)ptr + offset - 1;
+}
+
+void check_lowerbound_shadow(uint8_t *ptr, int index)
+{
+       uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+       if (*lower > (uint64_t)(unsigned long)ptr)
+               num_lower_brs++;
+       else
+               dprintf1("LowerBoundChk passed:%p\n", ptr);
+}
+
+void check_upperbound_shadow(uint8_t *ptr, int index)
+{
+       uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
+       if (upper < (uint64_t)(unsigned long)ptr)
+               num_upper_brs++;
+       else
+               dprintf1("UpperBoundChk passed:%p\n", ptr);
+}
+
+__always_inline void movbndreg_shadow(int src, int dest)
+{
+       shadow_plb[dest][0] = shadow_plb[src][0];
+       shadow_plb[dest][1] = shadow_plb[src][1];
+}
+
+__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
+{
+       unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
+       unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
+       *dest = *lower;
+       *(dest+1) = *upper;
+}
+
+__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
+{
+       unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
+       unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
+       *lower = *src;
+       *upper = *(src+1);
+}
+
+__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+       shadow_map[0] = (unsigned long)shadow_plb[index][0];
+       shadow_map[1] = (unsigned long)shadow_plb[index][1];
+       shadow_map[2] = (unsigned long)ptr_val;
+       dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
+                       index, ptr, ptr_val, ptr_val);
+       /*ptr ignored */
+}
+
+void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+       uint64_t lower = shadow_map[0];
+       uint64_t upper = shadow_map[1];
+       uint8_t *value = (uint8_t *)shadow_map[2];
+
+       if (value != ptr_val) {
+               dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
+                        "because %p != %p\n", __func__, index, ptr,
+                        ptr_val, index, value, ptr_val);
+               shadow_plb[index][0] = 0;
+               shadow_plb[index][1] = ~(unsigned long)0;
+       } else {
+               shadow_plb[index][0] = lower;
+               shadow_plb[index][1] = upper;
+       }
+       /* ptr ignored */
+}
+
+static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+}
+
+static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       mkbnd_shadow(ptr, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
+{
+       /* these are hard-coded to check bnd0 */
+       expected_bnd_index = 0;
+       mpx_check_lowerbound_helper((unsigned long)(ptr-1));
+       mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
+       /* reset this since we do not expect any more bounds exceptions */
+       expected_bnd_index = -1;
+}
+
+static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       check_lowerbound_shadow(ptr-1, 0);
+       check_upperbound_shadow(ptr+0x1800, 0);
+}
+
+static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+       mpx_movbndreg_helper();
+       mpx_movbnd2mem_helper(buf);
+       mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       mkbnd_shadow(ptr, 0, 0x1800);
+       movbndreg_shadow(0, 2);
+       movbnd2mem_shadow(0, (unsigned long *)buf);
+       mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_movbnd_from_mem_helper(buf);
+}
+
+static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       movbnd_from_mem_shadow((unsigned long *)buf, 0);
+}
+
+static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+       mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       stdsc_shadow(0, buf, ptr);
+       mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+}
+
+static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       lddsc_shadow(0, buf, ptr);
+}
+
+#define NR_MPX_TEST_FUNCTIONS 6
+
+/*
+ * For compatibility reasons, MPX will clear the bounds registers
+ * when you make function calls (among other things).  We have to
+ * preserve the registers in between calls to the "helpers" since
+ * they build on each other.
+ *
+ * Be very careful not to make any function calls inside the
+ * helpers, or anywhere else beween the xrstor and xsave.
+ */
+#define run_helper(helper_nr, buf, buf_shadow, ptr)    do {    \
+       xrstor_state(xsave_test_buf, flags);                    \
+       mpx_test_helper##helper_nr(buf, ptr);                   \
+       xsave_state(xsave_test_buf, flags);                     \
+       mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr);   \
+} while (0)
+
+static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
+{
+       uint64_t flags = 0x18;
+
+       dprint_context(xsave_test_buf);
+       switch (nr) {
+       case 0:
+               run_helper(0, buf, buf_shadow, ptr);
+               break;
+       case 1:
+               run_helper(1, buf, buf_shadow, ptr);
+               break;
+       case 2:
+               run_helper(2, buf, buf_shadow, ptr);
+               break;
+       case 3:
+               run_helper(3, buf, buf_shadow, ptr);
+               break;
+       case 4:
+               run_helper(4, buf, buf_shadow, ptr);
+               break;
+       case 5:
+               run_helper(5, buf, buf_shadow, ptr);
+               break;
+       default:
+               test_failed();
+               break;
+       }
+       dprint_context(xsave_test_buf);
+}
+
+unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
+extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
+
+long cover_buf_with_bt_entries(void *buf, long buf_len)
+{
+       int i;
+       long nr_to_fill;
+       int ratio = 1000;
+       unsigned long buf_len_in_ptrs;
+
+       /* Fill about 1/100 of the space with bt entries */
+       nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
+
+       if (!nr_to_fill)
+               dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
+
+       /* Align the buffer to pointer size */
+       while (((unsigned long)buf) % sizeof(void *)) {
+               buf++;
+               buf_len--;
+       }
+       /* We are storing pointers, so make */
+       buf_len_in_ptrs = buf_len / sizeof(void *);
+
+       for (i = 0; i < nr_to_fill; i++) {
+               long index = (mpx_random() % buf_len_in_ptrs);
+               void *ptr = buf + index * sizeof(unsigned long);
+               unsigned long ptr_addr = (unsigned long)ptr;
+
+               /* ptr and size can be anything */
+               mpx_make_bound_helper((unsigned long)ptr, 8);
+
+               /*
+                * take bnd0 and put it in to bounds tables "buf + index" is an
+                * address inside the buffer where we are pretending that we
+                * are going to put a pointer We do not, though because we will
+                * never load entries from the table, so it doesn't matter.
+                */
+               mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
+               dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
+                               ptr_addr, buf);
+       }
+       return nr_to_fill;
+}
+
+unsigned long align_down(unsigned long alignme, unsigned long align_to)
+{
+       return alignme & ~(align_to-1);
+}
+
+unsigned long align_up(unsigned long alignme, unsigned long align_to)
+{
+       return (alignme + align_to - 1) & ~(align_to-1);
+}
+
+/*
+ * Using 1MB alignment guarantees that each no allocation
+ * will overlap with another's bounds tables.
+ *
+ * We have to cook our own allocator here.  malloc() can
+ * mix other allocation with ours which means that even
+ * if we free all of our allocations, there might still
+ * be bounds tables for the *areas* since there is other
+ * valid memory there.
+ *
+ * We also can't use malloc() because a free() of an area
+ * might not free it back to the kernel.  We want it
+ * completely unmapped an malloc() does not guarantee
+ * that.
+ */
+#ifdef __i386__
+long alignment = 4096;
+long sz_alignment = 4096;
+#else
+long alignment = 1 * MB;
+long sz_alignment = 1 * MB;
+#endif
+void *mpx_mini_alloc(unsigned long sz)
+{
+       unsigned long long tries = 0;
+       static void *last;
+       void *ptr;
+       void *try_at;
+
+       sz = align_up(sz, sz_alignment);
+
+       try_at = last + alignment;
+       while (1) {
+               ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
+                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+               if (ptr == (void *)-1)
+                       return NULL;
+               if (ptr == try_at)
+                       break;
+
+               munmap(ptr, sz);
+               try_at += alignment;
+#ifdef __i386__
+               /*
+                * This isn't quite correct for 32-bit binaries
+                * on 64-bit kernels since they can use the
+                * entire 32-bit address space, but it's close
+                * enough.
+                */
+               if (try_at > (void *)0xC0000000)
+#else
+               if (try_at > (void *)0x0000800000000000)
+#endif
+                       try_at = (void *)0x0;
+               if (!(++tries % 10000))
+                       dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
+               continue;
+       }
+       last = ptr;
+       dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
+       return ptr;
+}
+void mpx_mini_free(void *ptr, long sz)
+{
+       dprintf2("%s() ptr: %p\n", __func__, ptr);
+       if ((unsigned long)ptr > 0x100000000000) {
+               dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
+               test_failed();
+       }
+       sz = align_up(sz, sz_alignment);
+       dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
+       munmap(ptr, sz);
+       dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
+}
+
+#define NR_MALLOCS 100
+struct one_malloc {
+       char *ptr;
+       int nr_filled_btes;
+       unsigned long size;
+};
+struct one_malloc mallocs[NR_MALLOCS];
+
+void free_one_malloc(int index)
+{
+       unsigned long free_ptr;
+       unsigned long mask;
+
+       if (!mallocs[index].ptr)
+               return;
+
+       mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
+       dprintf4("freed[%d]:  %p\n", index, mallocs[index].ptr);
+
+       free_ptr = (unsigned long)mallocs[index].ptr;
+       mask = alignment-1;
+       dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
+                       (free_ptr & mask), mask);
+       assert((free_ptr & mask) == 0);
+
+       mallocs[index].ptr = NULL;
+}
+
+#ifdef __i386__
+#define MPX_BOUNDS_TABLE_COVERS 4096
+#else
+#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
+#endif
+void zap_everything(void)
+{
+       long after_zap;
+       long before_zap;
+       int i;
+
+       before_zap = inspect_me(bounds_dir_ptr);
+       dprintf1("zapping everything start: %ld\n", before_zap);
+       for (i = 0; i < NR_MALLOCS; i++)
+               free_one_malloc(i);
+
+       after_zap = inspect_me(bounds_dir_ptr);
+       dprintf1("zapping everything done: %ld\n", after_zap);
+       /*
+        * We only guarantee to empty the thing out if our allocations are
+        * exactly aligned on the boundaries of a boudns table.
+        */
+       if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
+           (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
+               if (after_zap != 0)
+                       test_failed();
+
+               assert(after_zap == 0);
+       }
+}
+
+void do_one_malloc(void)
+{
+       static int malloc_counter;
+       long sz;
+       int rand_index = (mpx_random() % NR_MALLOCS);
+       void *ptr = mallocs[rand_index].ptr;
+
+       dprintf3("%s() enter\n", __func__);
+
+       if (ptr) {
+               dprintf3("freeing one malloc at index: %d\n", rand_index);
+               free_one_malloc(rand_index);
+               if (mpx_random() % (NR_MALLOCS*3) == 3) {
+                       int i;
+                       dprintf3("zapping some more\n");
+                       for (i = rand_index; i < NR_MALLOCS; i++)
+                               free_one_malloc(i);
+               }
+               if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
+                       zap_everything();
+       }
+
+       /* 1->~1M */
+       sz = (1 + mpx_random() % 1000) * 1000;
+       ptr = mpx_mini_alloc(sz);
+       if (!ptr) {
+               /*
+                * If we are failing allocations, just assume we
+                * are out of memory and zap everything.
+                */
+               dprintf3("zapping everything because out of memory\n");
+               zap_everything();
+               goto out;
+       }
+
+       dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
+       mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
+       mallocs[rand_index].ptr = ptr;
+       mallocs[rand_index].size = sz;
+out:
+       if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
+               inspect_me(bounds_dir_ptr);
+}
+
+void run_timed_test(void (*test_func)(void))
+{
+       int done = 0;
+       long iteration = 0;
+       static time_t last_print;
+       time_t now;
+       time_t start;
+
+       time(&start);
+       while (!done) {
+               time(&now);
+               if ((now - start) > TEST_DURATION_SECS)
+                       done = 1;
+
+               test_func();
+               iteration++;
+
+               if ((now - last_print > 1) || done) {
+                       printf("iteration %ld complete, OK so far\n", iteration);
+                       last_print = now;
+               }
+       }
+}
+
+void check_bounds_table_frees(void)
+{
+       printf("executing unmaptest\n");
+       inspect_me(bounds_dir_ptr);
+       run_timed_test(&do_one_malloc);
+       printf("done with malloc() fun\n");
+}
+
+void insn_test_failed(int test_nr, int test_round, void *buf,
+               void *buf_shadow, void *ptr)
+{
+       print_context(xsave_test_buf);
+       eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
+       while (test_nr == 5) {
+               struct mpx_bt_entry *bte;
+               struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
+               struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
+
+               printf("  bd: %p\n", bd);
+               printf("&bde: %p\n", bde);
+               printf("*bde: %lx\n", *(unsigned long *)bde);
+               if (!bd_entry_valid(bde))
+                       break;
+
+               bte = mpx_vaddr_to_bt_entry(buf, bd);
+               printf(" te: %p\n", bte);
+               printf("bte[0]: %lx\n", bte->contents[0]);
+               printf("bte[1]: %lx\n", bte->contents[1]);
+               printf("bte[2]: %lx\n", bte->contents[2]);
+               printf("bte[3]: %lx\n", bte->contents[3]);
+               break;
+       }
+       test_failed();
+}
+
+void check_mpx_insns_and_tables(void)
+{
+       int successes = 0;
+       int failures  = 0;
+       int buf_size = (1024*1024);
+       unsigned long *buf = malloc(buf_size);
+       const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
+       int i, j;
+
+       memset(buf, 0, buf_size);
+       memset(buf_shadow, 0, sizeof(buf_shadow));
+
+       for (i = 0; i < TEST_ROUNDS; i++) {
+               uint8_t *ptr = get_random_addr() + 8;
+
+               for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
+                       if (0 && j != 5) {
+                               successes++;
+                               continue;
+                       }
+                       dprintf2("starting test %d round %d\n", j, i);
+                       dprint_context(xsave_test_buf);
+                       /*
+                        * test5 loads an address from the bounds tables.
+                        * The load will only complete if 'ptr' matches
+                        * the load and the store, so with random addrs,
+                        * the odds of this are very small.  Make it
+                        * higher by only moving 'ptr' 1/10 times.
+                        */
+                       if (random() % 10 <= 0)
+                               ptr = get_random_addr() + 8;
+                       dprintf3("random ptr{%p}\n", ptr);
+                       dprint_context(xsave_test_buf);
+                       run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
+                       dprint_context(xsave_test_buf);
+                       if (!compare_context(xsave_test_buf)) {
+                               insn_test_failed(j, i, buf, buf_shadow, ptr);
+                               failures++;
+                               goto exit;
+                       }
+                       successes++;
+                       dprint_context(xsave_test_buf);
+                       dprintf2("finished test %d round %d\n", j, i);
+                       dprintf3("\n");
+                       dprint_context(xsave_test_buf);
+               }
+       }
+
+exit:
+       dprintf2("\nabout to free:\n");
+       free(buf);
+       dprintf1("successes: %d\n", successes);
+       dprintf1(" failures: %d\n", failures);
+       dprintf1("    tests: %d\n", total_nr_tests);
+       dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+       dprintf1("      saw: %d #BRs\n", br_count);
+       if (failures) {
+               eprintf("ERROR: non-zero number of failures\n");
+               exit(20);
+       }
+       if (successes != total_nr_tests) {
+               eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n",
+                               successes, total_nr_tests);
+               exit(21);
+       }
+       if (num_upper_brs + num_lower_brs != br_count) {
+               eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
+                               num_upper_brs, num_lower_brs, br_count);
+               eprintf("successes: %d\n", successes);
+               eprintf(" failures: %d\n", failures);
+               eprintf("    tests: %d\n", total_nr_tests);
+               eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+               eprintf("      saw: %d #BRs\n", br_count);
+               exit(22);
+       }
+}
+
+/*
+ * This is supposed to SIGSEGV nicely once the kernel
+ * can no longer allocate vaddr space.
+ */
+void exhaust_vaddr_space(void)
+{
+       unsigned long ptr;
+       /* Try to make sure there is no room for a bounds table anywhere */
+       unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
+#ifdef __i386__
+       unsigned long max_vaddr = 0xf7788000UL;
+#else
+       unsigned long max_vaddr = 0x800000000000UL;
+#endif
+
+       dprintf1("%s() start\n", __func__);
+       /* do not start at 0, we aren't allowed to map there */
+       for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+               void *ptr_ret;
+               int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
+
+               if (!ret) {
+                       dprintf1("madvise() %lx ret: %d\n", ptr, ret);
+                       continue;
+               }
+               ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
+                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+               if (ptr_ret != (void *)ptr) {
+                       perror("mmap");
+                       dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+                       break;
+               }
+               if (!(ptr & 0xffffff))
+                       dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+       }
+       for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+               dprintf2("covering 0x%lx with bounds table entries\n", ptr);
+               cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
+       }
+       dprintf1("%s() end\n", __func__);
+       printf("done with vaddr space fun\n");
+}
+
+void mpx_table_test(void)
+{
+       printf("starting mpx bounds table test\n");
+       run_timed_test(check_mpx_insns_and_tables);
+       printf("done with mpx bounds table test\n");
+}
+
+int main(int argc, char **argv)
+{
+       int unmaptest = 0;
+       int vaddrexhaust = 0;
+       int tabletest = 0;
+       int i;
+
+       check_mpx_support();
+       mpx_prepare();
+       srandom(11179);
+
+       bd_incore();
+       init();
+       bd_incore();
+
+       trace_me();
+
+       xsave_state((void *)xsave_test_buf, 0x1f);
+       if (!compare_context(xsave_test_buf))
+               printf("Init failed\n");
+
+       for (i = 1; i < argc; i++) {
+               if (!strcmp(argv[i], "unmaptest"))
+                       unmaptest = 1;
+               if (!strcmp(argv[i], "vaddrexhaust"))
+                       vaddrexhaust = 1;
+               if (!strcmp(argv[i], "tabletest"))
+                       tabletest = 1;
+       }
+       if (!(unmaptest || vaddrexhaust || tabletest)) {
+               unmaptest = 1;
+               /* vaddrexhaust = 1; */
+               tabletest = 1;
+       }
+       if (unmaptest)
+               check_bounds_table_frees();
+       if (tabletest)
+               mpx_table_test();
+       if (vaddrexhaust)
+               exhaust_vaddr_space();
+       printf("%s completed successfully\n", argv[0]);
+       exit(0);
+}
+
+#include "mpx-dig.c"