static unsigned long   page_size;
 static unsigned long   mmap_window = 32;
 
-static struct rb_root  threads;
-static struct thread   *last_match;
-
-
 struct sym_ext {
        struct rb_node  node;
        double          percent;
 process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
        char level;
-       struct thread *thread;
        u64 ip = event->ip.ip;
        struct map *map = NULL;
        struct symbol *sym = NULL;
-
-       thread = threads__findnew(event->ip.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->ip.pid);
 
        dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
                (void *)(offset + head),
 static int
 process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
        struct map *map = map__new(&event->mmap, NULL, 0);
-
-       thread = threads__findnew(event->mmap.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->mmap.pid);
 
        dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n",
                (void *)(offset + head),
 static int
 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
+       struct thread *thread = threads__findnew(event->comm.pid);
 
-       thread = threads__findnew(event->comm.pid, &threads, &last_match);
        dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
                (void *)(offset + head),
                (void *)(long)(event->header.size),
 static int
 process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
-       struct thread *parent;
+       struct thread *thread = threads__findnew(event->fork.pid);
+       struct thread *parent = threads__findnew(event->fork.ppid);
 
-       thread = threads__findnew(event->fork.pid, &threads, &last_match);
-       parent = threads__findnew(event->fork.ppid, &threads, &last_match);
        dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n",
                (void *)(offset + head),
                (void *)(long)(event->header.size),
        uint32_t size;
        char *buf;
 
-       register_idle_thread(&threads, &last_match);
+       register_idle_thread();
 
        input = open(input_name, O_RDONLY);
        if (input < 0) {
                return 0;
 
        if (verbose > 3)
-               threads__fprintf(stdout, &threads);
+               threads__fprintf(stdout);
 
        if (verbose > 2)
                dsos__fprintf(stdout);
 
 static char            *cwd;
 static int             cwdlen;
 
-static struct rb_root  threads;
-static struct thread   *last_match;
-
 static struct perf_header *header;
 
 static u64             sample_type;
 {
        char level;
        struct symbol *sym = NULL;
-       struct thread *thread;
        u64 ip = event->ip.ip;
        u64 period = 1;
        struct map *map = NULL;
        void *more_data = event->ip.__more_data;
        struct ip_callchain *chain = NULL;
        int cpumode;
-
-       thread = threads__findnew(event->ip.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->ip.pid);
 
        if (sample_type & PERF_SAMPLE_PERIOD) {
                period = *(u64 *)more_data;
 static int
 process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
        struct map *map = map__new(&event->mmap, cwd, cwdlen);
-
-       thread = threads__findnew(event->mmap.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->mmap.pid);
 
        dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
                (void *)(offset + head),
 static int
 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
-
-       thread = threads__findnew(event->comm.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->comm.pid);
 
        dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
                (void *)(offset + head),
 static int
 process_task_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
-       struct thread *parent;
-
-       thread = threads__findnew(event->fork.pid, &threads, &last_match);
-       parent = threads__findnew(event->fork.ppid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->fork.pid);
+       struct thread *parent = threads__findnew(event->fork.ppid);
 
        dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n",
                (void *)(offset + head),
        struct thread *idle;
        int ret;
 
-       idle = register_idle_thread(&threads, &last_match);
+       idle = register_idle_thread();
        thread__comm_adjust(idle);
 
        if (show_threads)
                return 0;
 
        if (verbose > 3)
-               threads__fprintf(stdout, &threads);
+               threads__fprintf(stdout);
 
        if (verbose > 2)
                dsos__fprintf(stdout);
 
 
 static unsigned long           total_comm = 0;
 
-static struct rb_root          threads;
-static struct thread           *last_match;
-
 static struct perf_header      *header;
 static u64                     sample_type;
 
 static int
 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
-
-       thread = threads__findnew(event->comm.tid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->comm.tid);
 
        dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
                (void *)(offset + head),
                die("hm, delta: %Ld < 0 ?\n", delta);
 
 
-       sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
-       sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+       sched_out = threads__findnew(switch_event->prev_pid);
+       sched_in = threads__findnew(switch_event->next_pid);
 
        out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
        if (!out_events) {
                     u64 timestamp,
                     struct thread *this_thread __used)
 {
-       struct work_atoms *atoms;
-       struct thread *thread;
+       struct thread *thread = threads__findnew(runtime_event->pid);
+       struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
 
        BUG_ON(cpu >= MAX_CPUS || cpu < 0);
-
-       thread = threads__findnew(runtime_event->pid, &threads, &last_match);
-       atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
        if (!atoms) {
                thread_atoms_insert(thread);
                atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
        if (!wakeup_event->success)
                return;
 
-       wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
+       wakee = threads__findnew(wakeup_event->pid);
        atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
        if (!atoms) {
                thread_atoms_insert(wakee);
        if (profile_cpu == -1)
                return;
 
-       migrant = threads__findnew(migrate_task_event->pid, &threads, &last_match);
+       migrant = threads__findnew(migrate_task_event->pid);
        atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
        if (!atoms) {
                thread_atoms_insert(migrant);
                die("hm, delta: %Ld < 0 ?\n", delta);
 
 
-       sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
-       sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+       sched_out = threads__findnew(switch_event->prev_pid);
+       sched_in = threads__findnew(switch_event->next_pid);
 
        curr_thread[this_cpu] = sched_in;
 
        if (!(sample_type & PERF_SAMPLE_RAW))
                return 0;
 
-       thread = threads__findnew(event->ip.pid, &threads, &last_match);
+       thread = threads__findnew(event->ip.pid);
 
        if (sample_type & PERF_SAMPLE_TIME) {
                timestamp = *(u64 *)more_data;
 
 static int read_events(void)
 {
-       register_idle_thread(&threads, &last_match);
+       register_idle_thread();
        register_perf_file_handler(&file_handler);
 
        return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd);
 
 static unsigned long   total = 0;
 static unsigned long   total_comm = 0;
 
-static struct rb_root  threads;
-static struct thread   *last_match;
-
 static struct perf_header *header;
 static u64             sample_type;
 
 static int
 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
-
-       thread = threads__findnew(event->comm.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->comm.pid);
 
        dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
                (void *)(offset + head),
 static int
 process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
        u64 ip = event->ip.ip;
        u64 timestamp = -1;
        u32 cpu = -1;
        u64 period = 1;
        void *more_data = event->ip.__more_data;
-
-       thread = threads__findnew(event->ip.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->ip.pid);
 
        if (sample_type & PERF_SAMPLE_TIME) {
                timestamp = *(u64 *)more_data;
 
 static int __cmd_trace(void)
 {
-       register_idle_thread(&threads, &last_match);
+       register_idle_thread();
        register_perf_file_handler(&file_handler);
 
        return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd);
 
 #include "util.h"
 #include "debug.h"
 
+static struct rb_root threads;
+static struct thread *last_match;
+
 static struct thread *thread__new(pid_t pid)
 {
        struct thread *self = calloc(1, sizeof(*self));
        return ret;
 }
 
-struct thread *
-threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
+struct thread *threads__findnew(pid_t pid)
 {
-       struct rb_node **p = &threads->rb_node;
+       struct rb_node **p = &threads.rb_node;
        struct rb_node *parent = NULL;
        struct thread *th;
 
         * so most of the time we dont have to look up
         * the full rbtree:
         */
-       if (*last_match && (*last_match)->pid == pid)
-               return *last_match;
+       if (last_match && last_match->pid == pid)
+               return last_match;
 
        while (*p != NULL) {
                parent = *p;
                th = rb_entry(parent, struct thread, rb_node);
 
                if (th->pid == pid) {
-                       *last_match = th;
+                       last_match = th;
                        return th;
                }
 
        th = thread__new(pid);
        if (th != NULL) {
                rb_link_node(&th->rb_node, parent, p);
-               rb_insert_color(&th->rb_node, threads);
-               *last_match = th;
+               rb_insert_color(&th->rb_node, &threads);
+               last_match = th;
        }
 
        return th;
 }
 
-struct thread *
-register_idle_thread(struct rb_root *threads, struct thread **last_match)
+struct thread *register_idle_thread(void)
 {
-       struct thread *thread = threads__findnew(0, threads, last_match);
+       struct thread *thread = threads__findnew(0);
 
        if (!thread || thread__set_comm(thread, "swapper")) {
                fprintf(stderr, "problem inserting idle task.\n");
        return 0;
 }
 
-size_t threads__fprintf(FILE *fp, struct rb_root *threads)
+size_t threads__fprintf(FILE *fp)
 {
        size_t ret = 0;
        struct rb_node *nd;
 
-       for (nd = rb_first(threads); nd; nd = rb_next(nd)) {
+       for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
                struct thread *pos = rb_entry(nd, struct thread, rb_node);
 
                ret += thread__fprintf(pos, fp);
 
 };
 
 int thread__set_comm(struct thread *self, const char *comm);
-struct thread *
-threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match);
-struct thread *
-register_idle_thread(struct rb_root *threads, struct thread **last_match);
+struct thread *threads__findnew(pid_t pid);
+struct thread *register_idle_thread(void);
 void thread__insert_map(struct thread *self, struct map *map);
 int thread__fork(struct thread *self, struct thread *parent);
-size_t threads__fprintf(FILE *fp, struct rb_root *threads);
+size_t threads__fprintf(FILE *fp);
 
 void maps__insert(struct rb_root *maps, struct map *map);
 struct map *maps__find(struct rb_root *maps, u64 ip);