static int                     no_samples                      = 0;
 static int                     sample_address                  = 0;
 static int                     multiplex                       = 0;
+static int                     multiplex_fd                    = -1;
 
 static long                    samples;
 static struct timeval          last_read;
         */
        if (group && group_fd == -1)
                group_fd = fd[nr_cpu][counter];
+       if (multiplex && multiplex_fd == -1)
+               multiplex_fd = fd[nr_cpu][counter];
 
-       event_array[nr_poll].fd = fd[nr_cpu][counter];
-       event_array[nr_poll].events = POLLIN;
-       nr_poll++;
-
-       mmap_array[nr_cpu][counter].counter = counter;
-       mmap_array[nr_cpu][counter].prev = 0;
-       mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
-       mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
-                       PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0);
-       if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
-               error("failed to mmap with %d (%s)\n", errno, strerror(errno));
-               exit(-1);
-       }
+       if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
+               int ret;
 
-       if (multiplex && fd[nr_cpu][counter] != group_fd)
-               ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, group_fd);
+               ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd);
+               assert(ret != -1);
+       } else {
+               event_array[nr_poll].fd = fd[nr_cpu][counter];
+               event_array[nr_poll].events = POLLIN;
+               nr_poll++;
+
+               mmap_array[nr_cpu][counter].counter = counter;
+               mmap_array[nr_cpu][counter].prev = 0;
+               mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
+               mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+                               PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0);
+               if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
+                       error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+                       exit(-1);
+               }
+       }
 
        ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
 }
                int hits = samples;
 
                for (i = 0; i < nr_cpu; i++) {
-                       for (counter = 0; counter < nr_counters; counter++)
-                               mmap_read(&mmap_array[i][counter]);
+                       for (counter = 0; counter < nr_counters; counter++) {
+                               if (mmap_array[i][counter].base)
+                                       mmap_read(&mmap_array[i][counter]);
+                       }
                }
 
                if (hits == samples) {
 
 static u64                     run_avg;
 
 static unsigned long           replay_repeat = 10;
+static unsigned long           nr_timestamps;
+static unsigned long           unordered_timestamps;
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtZX"
 
        if (atom->state != THREAD_SLEEPING)
                return;
 
-       if (atom->sched_out_time > timestamp)
+       nr_timestamps++;
+       if (atom->sched_out_time > timestamp) {
+               unordered_timestamps++;
                return;
+       }
 
        atom->state = THREAD_WAIT_CPU;
        atom->wake_up_time = timestamp;
 
        if (!atom_list->nb_atoms)
                return;
+       /*
+        * Ignore idle threads:
+        */
+       if (!atom_list->thread->pid)
+               return;
 
        all_runtime += atom_list->total_runtime;
        all_count += atom_list->nb_atoms;
        }
 
        printf("-----------------------------------------------------------------------------------\n");
-       printf(" TOTAL:            |%9.3f ms |%9Ld |\n",
+       printf(" TOTAL:            |%9.3f ms |%9Ld |",
                (double)all_runtime/1e6, all_count);
+
+       if (unordered_timestamps && nr_timestamps) {
+               printf(" INFO: %.2f%% unordered events.\n",
+                       (double)unordered_timestamps/(double)nr_timestamps*100.0);
+       } else {
+               printf("\n");
+       }
+
        printf("---------------------------------------------\n");
 }
 
        "-a",
        "-R",
        "-M",
-       "-g",
+       "-f",
        "-c", "1",
        "-e", "sched:sched_switch:r",
        "-e", "sched:sched_stat_wait:r",
        "-e", "sched:sched_stat_sleep:r",
        "-e", "sched:sched_stat_iowait:r",
+       "-e", "sched:sched_stat_runtime:r",
        "-e", "sched:sched_process_exit:r",
        "-e", "sched:sched_process_fork:r",
        "-e", "sched:sched_wakeup:r",