return -1;
        }
 
-       atoms->thread = thread;
+       atoms->thread = thread__get(thread);
        INIT_LIST_HEAD(&atoms->work_list);
        __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid);
        return 0;
 
        } else
                ttrace->entry_pending = true;
 
-       trace->current = thread;
+       if (trace->current != thread) {
+               thread__put(trace->current);
+               trace->current = thread__get(thread);
+       }
 
        return 0;
 }
        }
 
 out_disable:
+       thread__zput(trace->current);
+
        perf_evlist__disable(evlist);
 
        if (!err) {
 
                perf_hpp__set_user_width(symbol_conf.col_width_list_str);
 
        while (1) {
-               const struct thread *thread = NULL;
+               struct thread *thread = NULL;
                const struct dso *dso = NULL;
                int choice = 0,
                    annotate = -2, zoom_dso = -2, zoom_thread = -2,
                                pstack__remove(fstack, &browser->hists->thread_filter);
 zoom_out_thread:
                                ui_helpline__pop();
-                               browser->hists->thread_filter = NULL;
+                               thread__zput(browser->hists->thread_filter);
                                perf_hpp__set_elide(HISTC_THREAD, false);
                        } else {
                                ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
                                                   thread->comm_set ? thread__comm_str(thread) : "",
                                                   thread->tid);
-                               browser->hists->thread_filter = thread;
+                               browser->hists->thread_filter = thread__get(thread);
                                perf_hpp__set_elide(HISTC_THREAD, false);
                                pstack__push(fstack, &browser->hists->thread_filter);
                        }
 
 
        if (thread) {
                rb_erase(&thread->rb_node, &machine->threads);
-               machine->last_match = NULL;
-               thread__delete(thread);
+               if (machine->last_match == thread)
+                       thread__zput(machine->last_match);
+               thread__put(thread);
        }
 
        return 0;
 
                        callchain_init(he->callchain);
 
                INIT_LIST_HEAD(&he->pairs.node);
+               thread__get(he->thread);
        }
 
        return he;
 
 void hist_entry__delete(struct hist_entry *he)
 {
+       thread__zput(he->thread);
        zfree(&he->branch_info);
        zfree(&he->mem_info);
        zfree(&he->stat_acc);
 
        struct rb_root          entries_collapsed;
        u64                     nr_entries;
        u64                     nr_non_filtered_entries;
-       const struct thread     *thread_filter;
+       struct thread           *thread_filter;
        const struct dso        *dso_filter;
        const char              *uid_filter_str;
        const char              *symbol_filter_str;
 
 #include "unwind.h"
 #include "linux/hash.h"
 
+static void machine__remove_thread(struct machine *machine, struct thread *th);
+
 static void dsos__init(struct dsos *dsos)
 {
        INIT_LIST_HEAD(&dsos->head);
        }
 }
 
-void machine__delete_dead_threads(struct machine *machine)
-{
-       struct thread *n, *t;
-
-       list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
-               list_del(&t->node);
-               thread__delete(t);
-       }
-}
-
 void machine__delete_threads(struct machine *machine)
 {
        struct rb_node *nd = rb_first(&machine->threads);
        while (nd) {
                struct thread *t = rb_entry(nd, struct thread, rb_node);
 
-               rb_erase(&t->rb_node, &machine->threads);
                nd = rb_next(nd);
-               thread__delete(t);
+               machine__remove_thread(machine, t);
        }
 }
 
         * the full rbtree:
         */
        th = machine->last_match;
-       if (th && th->tid == tid) {
-               machine__update_thread_pid(machine, th, pid);
-               return th;
+       if (th != NULL) {
+               if (th->tid == tid) {
+                       machine__update_thread_pid(machine, th, pid);
+                       return th;
+               }
+
+               thread__zput(machine->last_match);
        }
 
        while (*p != NULL) {
                th = rb_entry(parent, struct thread, rb_node);
 
                if (th->tid == tid) {
-                       machine->last_match = th;
+                       machine->last_match = thread__get(th);
                        machine__update_thread_pid(machine, th, pid);
                        return th;
                }
                        thread__delete(th);
                        return NULL;
                }
-
-               machine->last_match = th;
+               /*
+                * It is now in the rbtree, get a ref
+                */
+               thread__get(th);
+               machine->last_match = thread__get(th);
        }
 
        return th;
 
 static void machine__remove_thread(struct machine *machine, struct thread *th)
 {
-       machine->last_match = NULL;
+       if (machine->last_match == th)
+               thread__zput(machine->last_match);
+
        rb_erase(&th->rb_node, &machine->threads);
        /*
-        * We may have references to this thread, for instance in some hist_entry
-        * instances, so just move them to a separate list.
+        * Move it first to the dead_threads list, then drop the reference,
+        * if this is the last reference, then the thread__delete destructor
+        * will be called and we will remove it from the dead_threads list.
         */
        list_add_tail(&th->node, &machine->dead_threads);
+       thread__put(th);
 }
 
 int machine__process_fork_event(struct machine *machine, union perf_event *event,
 
 struct machine *machine__new_host(void);
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
 void machine__exit(struct machine *machine);
-void machine__delete_dead_threads(struct machine *machine);
 void machine__delete_threads(struct machine *machine);
 void machine__delete(struct machine *machine);
 
 
        return NULL;
 }
 
-static void perf_session__delete_dead_threads(struct perf_session *session)
-{
-       machine__delete_dead_threads(&session->machines.host);
-}
-
 static void perf_session__delete_threads(struct perf_session *session)
 {
        machine__delete_threads(&session->machines.host);
 void perf_session__delete(struct perf_session *session)
 {
        perf_session__destroy_kernel_maps(session);
-       perf_session__delete_dead_threads(session);
        perf_session__delete_threads(session);
        perf_session_env__delete(&session->header.env);
        machines__exit(&session->machines);
 
        free(thread);
 }
 
+struct thread *thread__get(struct thread *thread)
+{
+       ++thread->refcnt;
+       return thread;
+}
+
+void thread__put(struct thread *thread)
+{
+       if (thread && --thread->refcnt == 0) {
+               list_del_init(&thread->node);
+               thread__delete(thread);
+       }
+}
+
 struct comm *thread__comm(const struct thread *thread)
 {
        if (list_empty(&thread->comm_list))
 
        pid_t                   tid;
        pid_t                   ppid;
        int                     cpu;
+       int                     refcnt;
        char                    shortname[3];
        bool                    comm_set;
        bool                    dead; /* if set thread has exited */
 struct thread *thread__new(pid_t pid, pid_t tid);
 int thread__init_map_groups(struct thread *thread, struct machine *machine);
 void thread__delete(struct thread *thread);
+
+struct thread *thread__get(struct thread *thread);
+void thread__put(struct thread *thread);
+
+static inline void __thread__zput(struct thread **thread)
+{
+       thread__put(*thread);
+       *thread = NULL;
+}
+
+#define thread__zput(thread) __thread__zput(&thread)
+
 static inline void thread__exited(struct thread *thread)
 {
        thread->dead = true;