#define PERF_X86_EVENT_COMMITTED       0x8 /* event passed commit_txn */
 #define PERF_X86_EVENT_PEBS_LD_HSW     0x10 /* haswell style datala, load */
 #define PERF_X86_EVENT_PEBS_NA_HSW     0x20 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL            0x40 /* HT exclusivity on counter */
 #define PERF_X86_EVENT_RDPMC_ALLOWED   0x40 /* grant rdpmc permission */
 
 
        unsigned                core_id;        /* per-core: core id */
 };
 
+enum intel_excl_state_type {
+       INTEL_EXCL_UNUSED    = 0, /* counter is unused */
+       INTEL_EXCL_SHARED    = 1, /* counter can be used by both threads */
+       INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
+};
+
+struct intel_excl_states {
+       enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
+       enum intel_excl_state_type state[X86_PMC_IDX_MAX];
+};
+
+struct intel_excl_cntrs {
+       raw_spinlock_t  lock;
+
+       struct intel_excl_states states[2];
+
+       int             refcnt;         /* per-core: #HT threads */
+       unsigned        core_id;        /* per-core: core id */
+};
+
 #define MAX_LBR_ENTRIES                16
 
 enum {
         * used on Intel NHM/WSM/SNB
         */
        struct intel_shared_regs        *shared_regs;
+       /*
+        * manage exclusive counter access between hyperthread
+        */
+       struct event_constraint *constraint_list; /* in enable order */
+       struct intel_excl_cntrs         *excl_cntrs;
+       int excl_thread_id; /* 0 or 1 */
 
        /*
         * AMD specific bits
 #define EVENT_CONSTRAINT(c, n, m)      \
        __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
 
+#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
+       __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
+                          0, PERF_X86_EVENT_EXCL)
+
 /*
  * The overlap flag marks event constraints with overlapping counter
  * masks. This is the case if the counter mask of such an event is not
  */
 #define PMU_FL_NO_HT_SHARING   0x1 /* no hyper-threading resource sharing */
 #define PMU_FL_HAS_RSP_1       0x2 /* has 2 equivalent offcore_rsp regs   */
+#define PMU_FL_EXCL_CNTRS      0x4 /* has exclusive counter requirements  */
 
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
 
        return regs;
 }
 
+static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
+{
+       struct intel_excl_cntrs *c;
+       int i;
+
+       c = kzalloc_node(sizeof(struct intel_excl_cntrs),
+                        GFP_KERNEL, cpu_to_node(cpu));
+       if (c) {
+               raw_spin_lock_init(&c->lock);
+               for (i = 0; i < X86_PMC_IDX_MAX; i++) {
+                       c->states[0].state[i] = INTEL_EXCL_UNUSED;
+                       c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
+
+                       c->states[1].state[i] = INTEL_EXCL_UNUSED;
+                       c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
+               }
+               c->core_id = -1;
+       }
+       return c;
+}
+
 static int intel_pmu_cpu_prepare(int cpu)
 {
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
-       if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
-               return NOTIFY_OK;
+       if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
+               cpuc->shared_regs = allocate_shared_regs(cpu);
+               if (!cpuc->shared_regs)
+                       return NOTIFY_BAD;
+       }
 
-       cpuc->shared_regs = allocate_shared_regs(cpu);
-       if (!cpuc->shared_regs)
-               return NOTIFY_BAD;
+       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+               size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
+
+               cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+               if (!cpuc->constraint_list)
+                       return NOTIFY_BAD;
+
+               cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
+               if (!cpuc->excl_cntrs) {
+                       kfree(cpuc->constraint_list);
+                       kfree(cpuc->shared_regs);
+                       return NOTIFY_BAD;
+               }
+               cpuc->excl_thread_id = 0;
+       }
 
        return NOTIFY_OK;
 }
 
        if (x86_pmu.lbr_sel_map)
                cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
+
+       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+               for_each_cpu(i, topology_thread_cpumask(cpu)) {
+                       struct intel_excl_cntrs *c;
+
+                       c = per_cpu(cpu_hw_events, i).excl_cntrs;
+                       if (c && c->core_id == core_id) {
+                               cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
+                               cpuc->excl_cntrs = c;
+                               cpuc->excl_thread_id = 1;
+                               break;
+                       }
+               }
+               cpuc->excl_cntrs->core_id = core_id;
+               cpuc->excl_cntrs->refcnt++;
+       }
 }
 
 static void intel_pmu_cpu_dying(int cpu)
 {
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
        struct intel_shared_regs *pc;
+       struct intel_excl_cntrs *c;
 
        pc = cpuc->shared_regs;
        if (pc) {
                        kfree(pc);
                cpuc->shared_regs = NULL;
        }
+       c = cpuc->excl_cntrs;
+       if (c) {
+               if (c->core_id == -1 || --c->refcnt == 0)
+                       kfree(c);
+               cpuc->excl_cntrs = NULL;
+               kfree(cpuc->constraint_list);
+               cpuc->constraint_list = NULL;
+       }
 
        fini_debug_store_on_cpu(cpu);
 }