#include <signal.h>
 #include <errno.h>
 #include <stddef.h>
+#include <stdbool.h>
 
 static inline pid_t rseq_gettid(void)
 {
 
 static int opt_yield, opt_signal, opt_sleep,
                opt_disable_rseq, opt_threads = 200,
-               opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
+               opt_disable_mod = 0, opt_test = 's';
 
 static long long opt_reps = 5000;
 
 
 #include "rseq.h"
 
+static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
+
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+#define TEST_MEMBARRIER
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+       return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+#endif
+
+#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
+# define RSEQ_PERCPU   RSEQ_PERCPU_MM_CID
+static
+int get_current_cpu_id(void)
+{
+       return rseq_current_mm_cid();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+       return rseq_mm_cid_available();
+}
+# ifdef TEST_MEMBARRIER
+/*
+ * Membarrier does not currently support targeting a mm_cid, so
+ * issue the barrier on all cpus.
+ */
+static
+int rseq_membarrier_expedited(int cpu)
+{
+       return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+                             0, 0);
+}
+# endif /* TEST_MEMBARRIER */
+#else
+# define RSEQ_PERCPU   RSEQ_PERCPU_CPU_ID
+static
+int get_current_cpu_id(void)
+{
+       return rseq_cpu_start();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+       return rseq_current_cpu_raw() >= 0;
+}
+# ifdef TEST_MEMBARRIER
+static
+int rseq_membarrier_expedited(int cpu)
+{
+       return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+                             MEMBARRIER_CMD_FLAG_CPU, cpu);
+}
+# endif /* TEST_MEMBARRIER */
+#endif
+
 struct percpu_lock_entry {
        intptr_t v;
 } __attribute__((aligned(128)));
        for (;;) {
                int ret;
 
-               cpu = rseq_cpu_start();
-               ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+               cpu = get_current_cpu_id();
+               ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                                        &lock->c[cpu].v,
                                         0, 1, cpu);
                if (rseq_likely(!ret))
                        break;
                do {
                        int cpu;
 
-                       cpu = rseq_cpu_start();
-                       ret = rseq_addv(&data->c[cpu].count, 1, cpu);
+                       cpu = get_current_cpu_id();
+                       ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                                       &data->c[cpu].count, 1, cpu);
                } while (rseq_unlikely(ret));
 #ifndef BENCHMARK
                if (i != 0 && !(i % (reps / 10)))
                intptr_t *targetptr, newval, expect;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
                /* Load list->c[cpu].head with single-copy atomicity. */
                expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
                newval = (intptr_t)node;
                targetptr = (intptr_t *)&list->c[cpu].head;
                node->next = (struct percpu_list_node *)expect;
-               ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+               ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                                        targetptr, expect, newval, cpu);
                if (rseq_likely(!ret))
                        break;
                /* Retry if comparison fails or rseq aborts. */
                long offset;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
                targetptr = (intptr_t *)&list->c[cpu].head;
                expectnot = (intptr_t)NULL;
                offset = offsetof(struct percpu_list_node, next);
                load = (intptr_t *)&head;
-               ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
-                                                  offset, load, cpu);
+               ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                                                targetptr, expectnot,
+                                                offset, load, cpu);
                if (rseq_likely(!ret)) {
                        node = head;
                        break;
                intptr_t offset;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
                offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
                if (offset == buffer->c[cpu].buflen)
                        break;
                targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
                newval_final = offset + 1;
                targetptr_final = &buffer->c[cpu].offset;
-               if (opt_mb)
-                       ret = rseq_cmpeqv_trystorev_storev_release(
-                               targetptr_final, offset, targetptr_spec,
-                               newval_spec, newval_final, cpu);
-               else
-                       ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
-                               offset, targetptr_spec, newval_spec,
-                               newval_final, cpu);
+               ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
+                       targetptr_final, offset, targetptr_spec,
+                       newval_spec, newval_final, cpu);
                if (rseq_likely(!ret)) {
                        result = true;
                        break;
                intptr_t offset;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
                /* Load offset with single-copy atomicity. */
                offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
                if (offset == 0) {
                head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
                newval = offset - 1;
                targetptr = (intptr_t *)&buffer->c[cpu].offset;
-               ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
+               ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                       targetptr, offset,
                        (intptr_t *)&buffer->c[cpu].array[offset - 1],
                        (intptr_t)head, newval, cpu);
                if (rseq_likely(!ret))
                size_t copylen;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
                /* Load offset with single-copy atomicity. */
                offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
                if (offset == buffer->c[cpu].buflen)
                copylen = sizeof(item);
                newval_final = offset + 1;
                targetptr_final = &buffer->c[cpu].offset;
-               if (opt_mb)
-                       ret = rseq_cmpeqv_trymemcpy_storev_release(
-                               targetptr_final, offset,
-                               destptr, srcptr, copylen,
-                               newval_final, cpu);
-               else
-                       ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
-                               offset, destptr, srcptr, copylen,
-                               newval_final, cpu);
+               ret = rseq_cmpeqv_trymemcpy_storev(
+                       opt_mo, RSEQ_PERCPU,
+                       targetptr_final, offset,
+                       destptr, srcptr, copylen,
+                       newval_final, cpu);
                if (rseq_likely(!ret)) {
                        result = true;
                        break;
                size_t copylen;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
                /* Load offset with single-copy atomicity. */
                offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
                if (offset == 0)
                copylen = sizeof(*item);
                newval_final = offset - 1;
                targetptr_final = &buffer->c[cpu].offset;
-               ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
-                       offset, destptr, srcptr, copylen,
+               ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                       targetptr_final, offset, destptr, srcptr, copylen,
                        newval_final, cpu);
                if (rseq_likely(!ret)) {
                        result = true;
 }
 
 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
-#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+#ifdef TEST_MEMBARRIER
 struct test_membarrier_thread_args {
        int stop;
        intptr_t percpu_list_ptr;
                int ret;
 
                do {
-                       int cpu = rseq_cpu_start();
+                       int cpu = get_current_cpu_id();
 
-                       ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
+                       ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                               &args->percpu_list_ptr,
                                sizeof(struct percpu_list_entry) * cpu, 1, cpu);
                } while (rseq_unlikely(ret));
        }
                free(list->c[i].head);
 }
 
-static int sys_membarrier(int cmd, int flags, int cpu_id)
-{
-       return syscall(__NR_membarrier, cmd, flags, cpu_id);
-}
-
 /*
  * The manager thread swaps per-cpu lists that worker threads see,
  * and validates that there are no unexpected modifications.
 
                /* Make list_b "active". */
                atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
-               if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
-                                       MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
+               if (rseq_membarrier_expedited(cpu_a) &&
                                errno != ENXIO /* missing CPU */) {
                        perror("sys_membarrier");
                        abort();
 
                /* Make list_a "active". */
                atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
-               if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
-                                       MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
+               if (rseq_membarrier_expedited(cpu_b) &&
                                errno != ENXIO /* missing CPU*/) {
                        perror("sys_membarrier");
                        abort();
                abort();
        }
 }
-#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
+#else /* TEST_MEMBARRIER */
 void test_membarrier(void)
 {
        fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
                        verbose = 1;
                        break;
                case 'M':
-                       opt_mb = 1;
+                       opt_mo = RSEQ_MO_RELEASE;
                        break;
                default:
                        show_usage(argc, argv);
 
        if (!opt_disable_rseq && rseq_register_current_thread())
                goto error;
+       if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
+               fprintf(stderr, "Error: cpu id getter unavailable\n");
+               goto error;
+       }
        switch (opt_test) {
        case 's':
                printf_verbose("spinlock\n");