static void open_counters(struct perf_evlist *evlist)
 {
-       struct perf_evsel *pos;
+       struct perf_evsel *pos, *first;
 
        if (evlist->cpus->map[0] < 0)
                no_inherit = true;
 
+       first = list_entry(evlist->entries.next, struct perf_evsel, node);
+
        list_for_each_entry(pos, &evlist->entries, node) {
                struct perf_event_attr *attr = &pos->attr;
+               struct xyarray *group_fd = NULL;
                /*
                 * Check if parse_single_tracepoint_event has already asked for
                 * PERF_SAMPLE_TIME.
                 */
                bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
 
+               if (group && pos != first)
+                       group_fd = first->fd;
+
                config_attr(pos, evlist);
 retry_sample_id:
                attr->sample_id_all = sample_id_all_avail ? 1 : 0;
 try_again:
-               if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
+               if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
+                                    group_fd) < 0) {
                        int err = errno;
 
                        if (err == EPERM || err == EACCES) {
 
 struct stats                   runtime_dtlb_cache_stats[MAX_NR_CPUS];
 struct stats                   walltime_nsecs_stats;
 
-static int create_perf_stat_counter(struct perf_evsel *evsel)
+static int create_perf_stat_counter(struct perf_evsel *evsel,
+                                   struct perf_evsel *first)
 {
        struct perf_event_attr *attr = &evsel->attr;
+       struct xyarray *group_fd = NULL;
+
+       if (group && evsel != first)
+               group_fd = first->fd;
 
        if (scale)
                attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
        attr->inherit = !no_inherit;
 
        if (system_wide)
-               return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, group);
-
+               return perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
+                                               group, group_fd);
        if (target_pid == -1 && target_tid == -1) {
                attr->disabled = 1;
                attr->enable_on_exec = 1;
        }
 
-       return perf_evsel__open_per_thread(evsel, evsel_list->threads, group);
+       return perf_evsel__open_per_thread(evsel, evsel_list->threads,
+                                          group, group_fd);
 }
 
 /*
 static int run_perf_stat(int argc __used, const char **argv)
 {
        unsigned long long t0, t1;
-       struct perf_evsel *counter;
+       struct perf_evsel *counter, *first;
        int status = 0;
        int child_ready_pipe[2], go_pipe[2];
        const bool forks = (argc > 0);
                close(child_ready_pipe[0]);
        }
 
+       first = list_entry(evsel_list->entries.next, struct perf_evsel, node);
+
        list_for_each_entry(counter, &evsel_list->entries, node) {
-               if (create_perf_stat_counter(counter) < 0) {
+               if (create_perf_stat_counter(counter, first) < 0) {
                        if (errno == EINVAL || errno == ENOSYS || errno == ENOENT) {
                                if (verbose)
                                        ui__warning("%s event is not supported by the kernel.\n",
 
                goto out_thread_map_delete;
        }
 
-       if (perf_evsel__open_per_thread(evsel, threads, false) < 0) {
+       if (perf_evsel__open_per_thread(evsel, threads, false, NULL) < 0) {
                pr_debug("failed to open counter: %s, "
                         "tweak /proc/sys/kernel/perf_event_paranoid?\n",
                         strerror(errno));
                goto out_thread_map_delete;
        }
 
-       if (perf_evsel__open(evsel, cpus, threads, false) < 0) {
+       if (perf_evsel__open(evsel, cpus, threads, false, NULL) < 0) {
                pr_debug("failed to open counter: %s, "
                         "tweak /proc/sys/kernel/perf_event_paranoid?\n",
                         strerror(errno));
 
                perf_evlist__add(evlist, evsels[i]);
 
-               if (perf_evsel__open(evsels[i], cpus, threads, false) < 0) {
+               if (perf_evsel__open(evsels[i], cpus, threads, false, NULL) < 0) {
                        pr_debug("failed to open counter: %s, "
                                 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
                                 strerror(errno));
 
 
 static void start_counters(struct perf_evlist *evlist)
 {
-       struct perf_evsel *counter;
+       struct perf_evsel *counter, *first;
+
+       first = list_entry(evlist->entries.next, struct perf_evsel, node);
 
        list_for_each_entry(counter, &evlist->entries, node) {
                struct perf_event_attr *attr = &counter->attr;
+               struct xyarray *group_fd = NULL;
+
+               if (group && counter != first)
+                       group_fd = first->fd;
 
                attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
 
                attr->inherit = inherit;
 try_again:
                if (perf_evsel__open(counter, top.evlist->cpus,
-                                    top.evlist->threads, group) < 0) {
+                                    top.evlist->threads, group,
+                                    group_fd) < 0) {
                        int err = errno;
 
                        if (err == EPERM || err == EACCES) {
 
 {
        evlist->selected = evsel;
 }
+
+int perf_evlist__open(struct perf_evlist *evlist, bool group)
+{
+       struct perf_evsel *evsel, *first;
+       int err, ncpus, nthreads;
+
+       first = list_entry(evlist->entries.next, struct perf_evsel, node);
+
+       list_for_each_entry(evsel, &evlist->entries, node) {
+               struct xyarray *group_fd = NULL;
+
+               if (group && evsel != first)
+                       group_fd = first->fd;
+
+               err = perf_evsel__open(evsel, evlist->cpus, evlist->threads,
+                                      group, group_fd);
+               if (err < 0)
+                       goto out_err;
+       }
+
+       return 0;
+out_err:
+       ncpus = evlist->cpus ? evlist->cpus->nr : 1;
+       nthreads = evlist->threads ? evlist->threads->nr : 1;
+
+       list_for_each_entry_reverse(evsel, &evlist->entries, node)
+               perf_evsel__close(evsel, ncpus, nthreads);
+
+       return err;
+}
 
 
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
 
+int perf_evlist__open(struct perf_evlist *evlist, bool group);
+
 int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
 int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
 void perf_evlist__munmap(struct perf_evlist *evlist);
 
 #include "thread_map.h"
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+#define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
 
 int __perf_evsel__sample_size(u64 sample_type)
 {
 }
 
 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
-                             struct thread_map *threads, bool group)
+                             struct thread_map *threads, bool group,
+                             struct xyarray *group_fds)
 {
        int cpu, thread;
        unsigned long flags = 0;
-       int pid = -1;
+       int pid = -1, err;
 
        if (evsel->fd == NULL &&
            perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
-               return -1;
+               return -ENOMEM;
 
        if (evsel->cgrp) {
                flags = PERF_FLAG_PID_CGROUP;
        }
 
        for (cpu = 0; cpu < cpus->nr; cpu++) {
-               int group_fd = -1;
+               int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1;
 
                for (thread = 0; thread < threads->nr; thread++) {
 
                                                                     pid,
                                                                     cpus->map[cpu],
                                                                     group_fd, flags);
-                       if (FD(evsel, cpu, thread) < 0)
+                       if (FD(evsel, cpu, thread) < 0) {
+                               err = -errno;
                                goto out_close;
+                       }
 
                        if (group && group_fd == -1)
                                group_fd = FD(evsel, cpu, thread);
                }
                thread = threads->nr;
        } while (--cpu >= 0);
-       return -1;
+       return err;
+}
+
+void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+       if (evsel->fd == NULL)
+               return;
+
+       perf_evsel__close_fd(evsel, ncpus, nthreads);
+       perf_evsel__free_fd(evsel);
+       evsel->fd = NULL;
 }
 
 static struct {
 };
 
 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
-                    struct thread_map *threads, bool group)
+                    struct thread_map *threads, bool group,
+                    struct xyarray *group_fd)
 {
        if (cpus == NULL) {
                /* Work around old compiler warnings about strict aliasing */
        if (threads == NULL)
                threads = &empty_thread_map.map;
 
-       return __perf_evsel__open(evsel, cpus, threads, group);
+       return __perf_evsel__open(evsel, cpus, threads, group, group_fd);
 }
 
 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
-                            struct cpu_map *cpus, bool group)
+                            struct cpu_map *cpus, bool group,
+                            struct xyarray *group_fd)
 {
-       return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group);
+       return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group,
+                                 group_fd);
 }
 
 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
-                               struct thread_map *threads, bool group)
+                               struct thread_map *threads, bool group,
+                               struct xyarray *group_fd)
 {
-       return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group);
+       return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group,
+                                 group_fd);
 }
 
 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
 
 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
-                            struct cpu_map *cpus, bool group);
+                            struct cpu_map *cpus, bool group,
+                            struct xyarray *group_fds);
 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
-                               struct thread_map *threads, bool group);
+                               struct thread_map *threads, bool group,
+                               struct xyarray *group_fds);
 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
-                    struct thread_map *threads, bool group);
+                    struct thread_map *threads, bool group,
+                    struct xyarray *group_fds);
+void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 #define perf_evsel__match(evsel, t, c)         \
        (evsel->attr.type == PERF_TYPE_##t &&   \
 
                cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
 
        evsel->attr.inherit = inherit;
-       if (perf_evsel__open(evsel, cpus, threads, group) < 0) {
+       /*
+        * This will group just the fds for this single evsel, to group
+        * multiple events, use evlist.open().
+        */
+       if (perf_evsel__open(evsel, cpus, threads, group, NULL) < 0) {
                PyErr_SetFromErrno(PyExc_OSError);
                return NULL;
        }
        return Py_None;
 }
 
+static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
+                                  PyObject *args, PyObject *kwargs)
+{
+       struct perf_evlist *evlist = &pevlist->evlist;
+       int group = 0;
+       static char *kwlist[] = { "group", NULL };
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, &group))
+               return NULL;
+
+       if (perf_evlist__open(evlist, group) < 0) {
+               PyErr_SetFromErrno(PyExc_OSError);
+               return NULL;
+       }
+
+       Py_INCREF(Py_None);
+       return Py_None;
+}
+
 static PyMethodDef pyrf_evlist__methods[] = {
        {
                .ml_name  = "mmap",
                .ml_flags = METH_VARARGS | METH_KEYWORDS,
                .ml_doc   = PyDoc_STR("mmap the file descriptor table.")
        },
+       {
+               .ml_name  = "open",
+               .ml_meth  = (PyCFunction)pyrf_evlist__open,
+               .ml_flags = METH_VARARGS | METH_KEYWORDS,
+               .ml_doc   = PyDoc_STR("open the file descriptors.")
+       },
        {
                .ml_name  = "poll",
                .ml_meth  = (PyCFunction)pyrf_evlist__poll,