return event->pmu == &pmu;
 }
 
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+       int     weight;
+       int     event;          /* event index */
+       int     counter;        /* counter index */
+       int     unassigned;     /* number of events to be assigned left */
+       unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+struct perf_sched {
+       int                     max_weight;
+       int                     max_events;
+       struct event_constraint **constraints;
+       struct sched_state      state;
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+                           int num, int wmin, int wmax)
+{
+       int idx;
+
+       memset(sched, 0, sizeof(*sched));
+       sched->max_events       = num;
+       sched->max_weight       = wmax;
+       sched->constraints      = c;
+
+       for (idx = 0; idx < num; idx++) {
+               if (c[idx]->weight == wmin)
+                       break;
+       }
+
+       sched->state.event      = idx;          /* start with min weight */
+       sched->state.weight     = wmin;
+       sched->state.unassigned = num;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+       struct event_constraint *c;
+       int idx;
+
+       if (!sched->state.unassigned)
+               return false;
+
+       if (sched->state.event >= sched->max_events)
+               return false;
+
+       c = sched->constraints[sched->state.event];
+
+       /* Grab the first unused counter starting with idx */
+       idx = sched->state.counter;
+       for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+               if (!__test_and_set_bit(idx, sched->state.used))
+                       break;
+       }
+       sched->state.counter = idx;
+
+       if (idx >= X86_PMC_IDX_MAX)
+               return false;
+
+       return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+       struct event_constraint *c;
+
+       if (!sched->state.unassigned || !--sched->state.unassigned)
+               return false;
+
+       do {
+               /* next event */
+               sched->state.event++;
+               if (sched->state.event >= sched->max_events) {
+                       /* next weight */
+                       sched->state.event = 0;
+                       sched->state.weight++;
+                       if (sched->state.weight > sched->max_weight)
+                               return false;
+               }
+               c = sched->constraints[sched->state.event];
+       } while (c->weight != sched->state.weight);
+
+       sched->state.counter = 0;       /* start with first counter */
+
+       return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+static int perf_assign_events(struct event_constraint **constraints, int n,
+                             int wmin, int wmax, int *assign)
+{
+       struct perf_sched sched;
+
+       perf_sched_init(&sched, constraints, n, wmin, wmax);
+
+       do {
+               if (!perf_sched_find_counter(&sched))
+                       break;  /* failed */
+               if (assign)
+                       assign[sched.state.event] = sched.state.counter;
+       } while (perf_sched_next_event(&sched));
+
+       return sched.state.unassigned;
+}
+
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
        struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
        unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       int i, j, w, wmax, num = 0;
+       int i, wmin, wmax, num = 0;
        struct hw_perf_event *hwc;
 
        bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
-       for (i = 0; i < n; i++) {
+       for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
                c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
                constraints[i] = c;
+               wmin = min(wmin, c->weight);
+               wmax = max(wmax, c->weight);
        }
 
        /*
                if (assign)
                        assign[i] = hwc->idx;
        }
-       if (i == n)
-               goto done;
 
-       /*
-        * begin slow path
-        */
+       /* slow path */
+       if (i != n)
+               num = perf_assign_events(constraints, n, wmin, wmax, assign);
 
-       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-
-       /*
-        * weight = number of possible counters
-        *
-        * 1    = most constrained, only works on one counter
-        * wmax = least constrained, works on any counter
-        *
-        * assign events to counters starting with most
-        * constrained events.
-        */
-       wmax = x86_pmu.num_counters;
-
-       /*
-        * when fixed event counters are present,
-        * wmax is incremented by 1 to account
-        * for one more choice
-        */
-       if (x86_pmu.num_counters_fixed)
-               wmax++;
-
-       for (w = 1, num = n; num && w <= wmax; w++) {
-               /* for each event */
-               for (i = 0; num && i < n; i++) {
-                       c = constraints[i];
-                       hwc = &cpuc->event_list[i]->hw;
-
-                       if (c->weight != w)
-                               continue;
-
-                       for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
-                               if (!test_bit(j, used_mask))
-                                       break;
-                       }
-
-                       if (j == X86_PMC_IDX_MAX)
-                               break;
-
-                       __set_bit(j, used_mask);
-
-                       if (assign)
-                               assign[i] = j;
-                       num--;
-               }
-       }
-done:
        /*
         * scheduling failed or is just a simulation,
         * free resources if necessary