struct event_constraint *c;
        unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
        struct perf_event *e;
-       int i, wmin, wmax, num = 0;
+       int i, wmin, wmax, unsched = 0;
        struct hw_perf_event *hwc;
 
        bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
        /* slow path */
        if (i != n)
-               num = perf_assign_events(cpuc->event_list, n, wmin,
-                                        wmax, assign);
+               unsched = perf_assign_events(cpuc->event_list, n, wmin,
+                                            wmax, assign);
 
        /*
-        * Mark the event as committed, so we do not put_constraint()
-        * in case new events are added and fail scheduling.
+        * In case of success (unsched = 0), mark events as committed,
+        * so we do not put_constraint() in case new events are added
+        * and fail to be scheduled
+        *
+        * We invoke the lower level commit callback to lock the resource
+        *
+        * We do not need to do all of this in case we are called to
+        * validate an event group (assign == NULL)
         */
-       if (!num && assign) {
+       if (!unsched && assign) {
                for (i = 0; i < n; i++) {
                        e = cpuc->event_list[i];
                        e->hw.flags |= PERF_X86_EVENT_COMMITTED;
                                x86_pmu.commit_scheduling(cpuc, e, assign[i]);
                }
        }
-       /*
-        * scheduling failed or is just a simulation,
-        * free resources if necessary
-        */
-       if (!assign || num) {
+
+       if (!assign || unsched) {
+
                for (i = 0; i < n; i++) {
                        e = cpuc->event_list[i];
                        /*
                        if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
                                continue;
 
+                       /*
+                        * release events that failed scheduling
+                        */
                        if (x86_pmu.put_event_constraints)
                                x86_pmu.put_event_constraints(cpuc, e);
                }
        if (x86_pmu.stop_scheduling)
                x86_pmu.stop_scheduling(cpuc);
 
-       return num ? -EINVAL : 0;
+       return unsched ? -EINVAL : 0;
 }
 
 /*
 
 }
 
 static struct event_constraint *
-intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
                            struct perf_event *event)
 {
        struct event_constraint *c;
        return x86_get_event_constraints(cpuc, idx, event);
 }
 
+static void
+intel_start_scheduling(struct cpu_hw_events *cpuc)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl, *xlo;
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid; /* sibling thread */
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake)
+               return;
+       /*
+        * no exclusion needed
+        */
+       if (!excl_cntrs)
+               return;
+
+       xlo = &excl_cntrs->states[o_tid];
+       xl = &excl_cntrs->states[tid];
+
+       xl->sched_started = true;
+
+       /*
+        * lock shared state until we are done scheduling
+        * in stop_event_scheduling()
+        * makes scheduling appear as a transaction
+        */
+       WARN_ON_ONCE(!irqs_disabled());
+       raw_spin_lock(&excl_cntrs->lock);
+
+       /*
+        * save initial state of sibling thread
+        */
+       memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
+}
+
+static void
+intel_stop_scheduling(struct cpu_hw_events *cpuc)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl, *xlo;
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid; /* sibling thread */
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake)
+               return;
+       /*
+        * no exclusion needed
+        */
+       if (!excl_cntrs)
+               return;
+
+       xlo = &excl_cntrs->states[o_tid];
+       xl = &excl_cntrs->states[tid];
+
+       /*
+        * make new sibling thread state visible
+        */
+       memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
+
+       xl->sched_started = false;
+       /*
+        * release shared state lock (acquired in intel_start_scheduling())
+        */
+       raw_spin_unlock(&excl_cntrs->lock);
+}
+
+static struct event_constraint *
+intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+                          int idx, struct event_constraint *c)
+{
+       struct event_constraint *cx;
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl, *xlo;
+       int is_excl, i;
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid; /* alternate */
+
+       /*
+        * validating a group does not require
+        * enforcing cross-thread  exclusion
+        */
+       if (cpuc->is_fake)
+               return c;
+
+       /*
+        * event requires exclusive counter access
+        * across HT threads
+        */
+       is_excl = c->flags & PERF_X86_EVENT_EXCL;
+
+       /*
+        * xl = state of current HT
+        * xlo = state of sibling HT
+        */
+       xl = &excl_cntrs->states[tid];
+       xlo = &excl_cntrs->states[o_tid];
+
+       cx = c;
+
+       /*
+        * because we modify the constraint, we need
+        * to make a copy. Static constraints come
+        * from static const tables.
+        *
+        * only needed when constraint has not yet
+        * been cloned (marked dynamic)
+        */
+       if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+
+               /* sanity check */
+               if (idx < 0)
+                       return &emptyconstraint;
+
+               /*
+                * grab pre-allocated constraint entry
+                */
+               cx = &cpuc->constraint_list[idx];
+
+               /*
+                * initialize dynamic constraint
+                * with static constraint
+                */
+               memcpy(cx, c, sizeof(*cx));
+
+               /*
+                * mark constraint as dynamic, so we
+                * can free it later on
+                */
+               cx->flags |= PERF_X86_EVENT_DYNAMIC;
+       }
+
+       /*
+        * From here on, the constraint is dynamic.
+        * Either it was just allocated above, or it
+        * was allocated during a earlier invocation
+        * of this function
+        */
+
+       /*
+        * Modify static constraint with current dynamic
+        * state of thread
+        *
+        * EXCLUSIVE: sibling counter measuring exclusive event
+        * SHARED   : sibling counter measuring non-exclusive event
+        * UNUSED   : sibling counter unused
+        */
+       for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
+               /*
+                * exclusive event in sibling counter
+                * our corresponding counter cannot be used
+                * regardless of our event
+                */
+               if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
+                       __clear_bit(i, cx->idxmsk);
+               /*
+                * if measuring an exclusive event, sibling
+                * measuring non-exclusive, then counter cannot
+                * be used
+                */
+               if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
+                       __clear_bit(i, cx->idxmsk);
+       }
+
+       /*
+        * recompute actual bit weight for scheduling algorithm
+        */
+       cx->weight = hweight64(cx->idxmsk64);
+
+       /*
+        * if we return an empty mask, then switch
+        * back to static empty constraint to avoid
+        * the cost of freeing later on
+        */
+       if (cx->weight == 0)
+               cx = &emptyconstraint;
+
+       return cx;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                           struct perf_event *event)
+{
+       struct event_constraint *c = event->hw.constraint;
+
+       /*
+        * first time only
+        * - static constraint: no change across incremental scheduling calls
+        * - dynamic constraint: handled by intel_get_excl_constraints()
+        */
+       if (!c)
+               c = __intel_get_event_constraints(cpuc, idx, event);
+
+       if (cpuc->excl_cntrs)
+               return intel_get_excl_constraints(cpuc, event, idx, c);
+
+       return c;
+}
+
+static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
+               struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xlo, *xl;
+       unsigned long flags = 0; /* keep compiler happy */
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid;
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake)
+               return;
+
+       WARN_ON_ONCE(!excl_cntrs);
+
+       if (!excl_cntrs)
+               return;
+
+       xl = &excl_cntrs->states[tid];
+       xlo = &excl_cntrs->states[o_tid];
+
+       /*
+        * put_constraint may be called from x86_schedule_events()
+        * which already has the lock held so here make locking
+        * conditional
+        */
+       if (!xl->sched_started)
+               raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
+
+       /*
+        * if event was actually assigned, then mark the
+        * counter state as unused now
+        */
+       if (hwc->idx >= 0)
+               xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
+
+       if (!xl->sched_started)
+               raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
+}
+
 static void
 intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
                                        struct perf_event *event)
 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
                                        struct perf_event *event)
 {
+       struct event_constraint *c = event->hw.constraint;
+
        intel_put_shared_regs_event_constraints(cpuc, event);
+
+       /*
+        * is PMU has exclusive counter restrictions, then
+        * all events are subject to and must call the
+        * put_excl_constraints() routine
+        */
+       if (c && cpuc->excl_cntrs)
+               intel_put_excl_constraints(cpuc, event);
+
+       /* cleanup dynamic constraint */
+       if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
+               event->hw.constraint = NULL;
+}
+
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
+                                   struct perf_event *event, int cntr)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct event_constraint *c = event->hw.constraint;
+       struct intel_excl_states *xlo, *xl;
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid;
+       int is_excl;
+
+       if (cpuc->is_fake || !c)
+               return;
+
+       is_excl = c->flags & PERF_X86_EVENT_EXCL;
+
+       if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+               return;
+
+       WARN_ON_ONCE(!excl_cntrs);
+
+       if (!excl_cntrs)
+               return;
+
+       xl = &excl_cntrs->states[tid];
+       xlo = &excl_cntrs->states[o_tid];
+
+       WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
+
+       if (cntr >= 0) {
+               if (is_excl)
+                       xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
+               else
+                       xlo->init_state[cntr] = INTEL_EXCL_SHARED;
+       }
 }
 
 static void intel_pebs_aliases_core2(struct perf_event *event)
                cpuc->constraint_list = NULL;
        }
 
+       c = cpuc->excl_cntrs;
+       if (c) {
+               if (c->core_id == -1 || --c->refcnt == 0)
+                       kfree(c);
+               cpuc->excl_cntrs = NULL;
+       }
+
        fini_debug_store_on_cpu(cpu);
 }